Rev 4203 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupfrom BaseScraper import BaseScraperfrom Utils import removePriceFormattingclass FlipcartScraper(BaseScraper):def __init__(self):BaseScraper.__init__(self)self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):html = BaseScraper.read(self, self.url)self.soup = BeautifulSoup(html)self.phones = Nonedef getPhones(self):phones = []for div in self.soup.findAll('div', {'class': 'fk-product-thumb fkp-medium'}):try:anchor = div.find('a', {'class': 'title tpadding5 fk-anchor-link'})name = anchor['title'].strip()price = Noneproduct_url = anchor['href'].strip()in_stock = 0 if div.findAll('b').__len__() > 0 else 1for span in div.findAll('span'):try:if span['class'].find('price final-price') > -1:price = span.string.strip()except KeyError:passtry:if price is None:continueelse:phones.append({'name': str(name),'price': removePriceFormatting(price),'source': 'flipkart','product_url': str(product_url),'in_stock': in_stock})except UnboundLocalError as e:print e, nameprint divexcept UnicodeEncodeError as e:print 'Unicode Error', e, namename_ascii = "".join([char if ord(char) < 128 else " " for char in name])print name_asciiphones.append({"name": str(name_ascii),"price": str(price),'source': 'flipkart',"in_stock": in_stock,"product_url": str(product_url)})except KeyError:passself.phones = phonesreturn phonesdef getNextUrl(self):tab_info = self.soup.find('div', {'class': 'unit fk-lres-header-text'})('b')current_max = int(tab_info[0].find('span').string)total = int(tab_info[1].string)if len(self.phones) > 0:base_url = 'http://www.flipkart.com/mobiles/%s' % ('all/' if self.phones[0]['product_url'].find('/tablets/') == -1 else 'tablet-20278/')if current_max < total:return base_url + str(1 + (current_max / 20))else:return Noneelse:return Nonedef getDataFromProductPage(self, url):html = BaseScraper.read(self, url)soup = BeautifulSoup(html)name = soup.find('h1', {'itemprop': 'name'}).string.strip()price = soup.find('span',{'id': 'fk-mprod-our-id'}).contents[2]in_stock = 1data = {"product_url": str(url),"source": "flipkart","price": price,"in_stock": 1 if in_stock == 'In Stock.' else 0,"name": name}return dataif __name__ == '__main__':s = FlipcartScraper()data = s.getDataFromProductPage('http://www.flipkart.com/samsung-wave-ii-s8530-mobile-phone/p/itmctnexz3gyjfac?pid=MOBCTXB47XCP7Z9X&ref=eca2ea19-cde2-4bfd-a3d8-15cf737c88d3')print data# s.setUrl('http://www.flipkart.com/mobiles/all')# s.scrape()# phones = s.getPhones()# for p in phones: print p# print s.getNextUrl()