Rev 4039 | Rev 4199 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingclass LetsBuyScraper(BaseScraper):def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):html = BaseScraper.read(self, self.url)self.soup = BeautifulSoup(html)def getPhones(self):phone_prices = []for div in self.soup.findAll('div', {'class': "detailbox"}):name_tag = div('h2')[0]('a')[0]name = name_tag.string.strip()price = removePriceFormatting(div.findAll('span', {'class': "text12_stb"})[0].string.strip())url = str(name_tag['href'])try:phone_prices.append({"name": str(name),"price": str(price),'source': 'letsbuy',"in_stock": 1,"product_url": str(url)})except UnicodeEncodeError as e:print 'Unicode Error', e, namename_ascii = "".join([char if ord(char) < 128 else " " for char in name])print name_asciiphone_prices.append({"name": str(name_ascii),"price": str(price),'source': 'letsbuy',"in_stock": 1,"product_url": str(url)})return phone_pricesdef getNextUrl(self):next_url = Nonefor anchor in self.soup.findAll('a'):try:if anchor['title'].strip() == "Next Page":next_url = anchor['href'].strip()except KeyError:passreturn next_urlif __name__ == '__main__':s = LetsBuyScraper()s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')s.scrape()phones = s.getPhones()print phonesprint s.getNextUrl()