Rev 4198 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingclass LetsBuyScraper(BaseScraper):def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):html = BaseScraper.read(self, self.url)self.soup = BeautifulSoup(html)def getPhones(self):phone_prices = []for div in self.soup.findAll('div', {'class': "detailbox"}):name_tag = div('h2')[0]('a')[0]name = name_tag.string.strip()price = removePriceFormatting(div.findAll('span', {'class': "text12_stb"})[0].string.strip())url = str(name_tag['href'])try:phone_prices.append({"name": str(name),"price": str(price),'source': 'letsbuy',"in_stock": 1,"product_url": str(url)})except UnicodeEncodeError as e:print 'Unicode Error', e, namename_ascii = "".join([char if ord(char) < 128 else " " for char in name])print name_asciiphone_prices.append({"name": str(name_ascii),"price": str(price),'source': 'letsbuy',"in_stock": 1,"product_url": str(url)})return phone_pricesdef getNextUrl(self):next_url = Nonefor anchor in self.soup.findAll('a'):try:if anchor['title'].strip() == "Next Page":next_url = anchor['href'].strip()except KeyError:passreturn next_urldef getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = soup.find('h1', {'class': 'prod_name'}).string.strip()price = removePriceFormatting(soup.find('span',{'class': 'offer_price'}).string.strip())data = {"product_url": str(url),"source": "letsbuy","price": price,"in_stock": 1,"name": name}return dataif __name__ == '__main__':s = LetsBuyScraper()print s.getDataFromProductPage('http://www.letsbuy.com/samsung-galaxy-pop-s5570-p-14143')# s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')# s.scrape()# phones = s.getPhones()# print phones# print s.getNextUrl()