Rev 4199 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingimport jsonclass LetsBuyScraper(BaseScraper):pageCount = {}def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = Noneself.currentPage = Noneself.category = Nonedef setUrl(self, url):self.url = urlfor params in url.split('?')[1].split('&'):paramName = params.split('=')[0].strip()if paramName == 'pg':self.currentPage = int(params.split('=')[1])elif paramName == 'c':self.category = params.split('=')[1]if self.currentPage is None:self.currentPage = 1def scrape(self):str = BaseScraper.read(self, self.url)self.json = json.loads(str)self.setPageCount()def getPhones(self):phones = []for product in self.json['result']:phones.append({'name': str(product['products_name']),'price': product['products_price'],'source': 'letsbuy','product_url': str(product['url']),'in_stock': int(product['product_status'])})return phonesdef getNextUrl(self):if self.currentPage < LetsBuyScraper.pageCount[self.category]:return 'http://www.letsbuy.com/filterResult?c=%s&pp=192&pg=%s' % (self.category, self.currentPage + 1)else:return Nonedef setPageCount(self):if LetsBuyScraper.pageCount is None or self.category not in LetsBuyScraper.pageCount:resultCount = int(self.json['resultCount']['0'])LetsBuyScraper.pageCount[self.category] = 1 + int(resultCount / 192)def getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = soup.find('h1', {'class': 'prod_name'}).string.strip()price = removePriceFormatting(soup.find('span',{'class': 'offer_price'}).string.strip())data = {"product_url": str(url),"source": "letsbuy","price": price,"in_stock": 1,"name": name}return dataif __name__ == '__main__':s = LetsBuyScraper()# print s.getDataFromProductPage('http://www.letsbuy.com/samsung-galaxy-pop-s5570-p-14143')# s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88')s.setUrl('http://www.letsbuy.com/filterResult?c=254_88&pp=192&pg=7')s.scrape()print s.getPhones()print s.getNextUrl()