Rev 5639 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingclass SulekhaScraper(BaseScraper):pageCount = 11productCountPerScraping = 24currentPage=1;def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = None#self.currentPage = 1def setUrl(self, url):self.url = urldef scrape(self):html = BaseScraper.read(self, self.url)self.soup = BeautifulSoup(html)self.phones = None#self.setPageCount()def getPhones(self):phones = []allPhoneUl = self.soup.find('ul', id="MMobBrandOffersListCont")try:for li in allPhoneUl.findAll('li'):anchorDiv = li.find('div', {'class': 'dealtit'})anchor = anchorDiv.find('a')name = anchor.string.strip()price = li.find('span',{'class': 'deals-our-price'}).contents[1].strip()product_url = anchor['href'].strip()in_stock = 1try:if price is None:continueelse:phones.append({'name': str(name),'price': removePriceFormatting(price),'source': 'sulekha','product_url': str(product_url),'in_stock': in_stock})except UnboundLocalError as e:print e, nameprint liexcept Exception as e:print eself.phones = phonesreturn phonesdef setPageCount(self):self.currentPage = self.currentPage + 1def getNextUrl(self):if SulekhaScraper.currentPage < SulekhaScraper.pageCount:SulekhaScraper.currentPage += 1return 'http://mobiles.sulekha.com/common/common.aspx?type=mobileofferslist&makeId=0&modelId=0&pageNo=%s' % SulekhaScraper.currentPageelse:return Nonedef getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = soup.find('h1', {'class': 'product-title'})('a')[0].contents[0].string.strip()price = soup.find('span',{'itemprop': 'price'}).string.strip()[:-2].replace(",","")in_stock = 1data = {"product_url": str(url),"source": "sulekha","price": price,"in_stock": 1,"name": name}return dataif __name__ == '__main__':s = SulekhaScraper()data = s.getDataFromProductPage('http://deals.sulekha.com/blackberry-curve-9360-white-17561')print data