Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-May-2012@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingimport jsonclass SnapdealScraper(BaseScraper):pageCount = {}productCountPerScraping = 20def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = Noneself.currentPage = Noneself.category = Nonedef setUrl(self, url):self.url = urlurlChunks = url.split('/')self.category = urlChunks[7]self.currentPage = 1 + (int(urlChunks[8]) / SnapdealScraper.productCountPerScraping)def scrape(self):str = BaseScraper.read(self, self.url)self.json = json.loads(str)self.setPageCount()def getPhones(self):phones = []for product in self.json['productResponseDTO']['productDtos']:phones.append({'name': str(product['name']),'price': product['voucherPrice'],'source': 'snapdeal','product_url': str(product['pageUrl']),'in_stock': int(not bool(product['soldOut']))})return phonesdef getNextUrl(self):if self.currentPage < SnapdealScraper.pageCount[self.category]:return 'http://www.snapdeal.com/json/product/get/search/%s/%s/%s?q=&sort=plrty&keyword=' % (self.category, self.currentPage * SnapdealScraper.productCountPerScraping, SnapdealScraper.productCountPerScraping)else:return Nonedef setPageCount(self):if SnapdealScraper.pageCount is None or self.category not in SnapdealScraper.pageCount:resultCount = int(self.json['productResponseDTO']['numberFound'])SnapdealScraper.pageCount[self.category] = 1 + int(resultCount / SnapdealScraper.productCountPerScraping)def getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = str(soup.find('div', {'class': 'prodtitle-head'}).find('h1').string.strip())price = removePriceFormatting(str(soup.find('span', {'id': 'selling-price-id'}).string.strip()))data = {"product_url": str(url),"source": "snapdeal","price": price,"in_stock": 1,"name": name}return dataif __name__ == '__main__':s = SnapdealScraper()#s.setUrl('http://www.snapdeal.com/json/product/get/search/175/60/20?q=&sort=plrty&keyword=')#s.scrape()#print s.getPhones()#print s.getNextUrl()print s.getDataFromProductPage('http://www.snapdeal.com/product/mobiles-mobile-phones/sony-ericsson-xperia-mini-pro-sk17i-black-10052?pos=31;493')