Rev 5291 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingclass InfibeamScraper(BaseScraper):def __init__(self):self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):html = BaseScraper.read(self, self.url)self.soup = BeautifulSoup(html)def getPhones(self):phone_prices = []if self.url.find('Cameras') !=-1 :ulTagClass = 'srch_result landscape'else :ulTagClass = 'srch_result portrait'for li in self.soup.findAll('ul', {'class': ulTagClass})[0]('li'):name = li.find('span', {'class': 'title'}).contents[1].strip()try:price = li.find('div', {'class': 'price'}).find('span', {'class': 'normal'}).stringexcept IndexError:price = removePriceFormatting(li.find('span', {'class': 'price'}).contents[-1].strip())except AttributeError:price = removePriceFormatting(li.find('span', {'class': 'price'}).contents[-1].strip())url = li.findAll('a')[0]['href']try:phone_prices.append({'name': str(name),'price': removePriceFormatting(str(price)),'source': 'infibeam','in_stock': 1,'product_url': str(url)})except UnicodeEncodeError as e:print 'Unicode Error', e, namename_ascii = "".join([char if ord(char) < 128 else " " for char in name])print name_asciiphone_prices.append({"name": str(name_ascii),"price": removePriceFormatting(str(price)),'source': 'infibeam',"in_stock": 1,"product_url": str(url)})return phone_pricesdef getNextUrl(self):b = self.soup.findAll('div', {'class': 'resultsSummary'})[0].findAll('b')current_max = int(b[0].string.split('-')[1])total_products = int(b[1].string)urlDivider = self.url.find('page=')return self.url[:urlDivider] + 'page=%d' % (1 + current_max / 20) if current_max < total_products else Nonedef getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = soup.find('div', {'id': 'ib_details'}).find('h1', {'class': "fn"}).find('span', {'class': "item"}).string.strip()price = removePriceFormatting(soup.find('div',{'id': 'priceDiv'}).find('span', {'class': 'infiPrice amount price'}).string)in_stock = soup.find('div', {'id': 'colors'}).find('span', {'class':"status"}).string.strip()data = {"product_url": str(url),"source": "infibeam","price": price,"in_stock": 1 if in_stock == 'In Stock.' else 0,"name": name}return dataif __name__ == '__main__':s = InfibeamScraper()# print s.getDataFromProductPage('http://www.infibeam.com/Mobiles/i-HTC-EVO-3D-Android-Smartphone/P-E-M-HTC-EVO-3D.html?id=Black')s.setUrl('http://www.infibeam.com/Mobiles/search?page=5')s.scrape()products = s.getPhones()print productsprint s.getNextUrl()