Blame | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupimport urllibclass FlipcartScraper:def __init__(self):self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):sock = urllib.urlopen(self.url)html = sock.read()sock.close()self.soup = BeautifulSoup(html)def getPhones(self):phones = []for div in self.soup.findAll('div', {'class': 'fk-product-thumb fkp-medium'}):try:anchor = div.findAll('a', {'class': 'title fk-anchor-link'})[0]name = anchor['title'].strip()product_url = anchor['href'].strip()in_stock = 0 if div.findAll('b').__len__() > 0 else 1for span in div.findAll('span'):try:if span['class'].find('price final-price') > -1:price = span.string.strip()except KeyError:passphones.append({'name': str(name), 'price': str(price), 'product_url': str(product_url), 'in_stock': in_stock})except KeyError:passreturn phonesdef getNextUrl(self):tab_info = self.soup.findAll('div', {'class': 'unit fk-lres-header-text'})[0]('b')current_max = int(tab_info[0].string.split('-')[1])total = int(tab_info[1].string)if current_max < total:return 'http://www.flipkart.com/mobiles/all/%d' % (1 + (current_max / 20))else:return Noneif __name__ == '__main__':s = FlipcartScraper()s.setUrl('http://www.flipkart.com/mobiles/all/24')s.scrape()phones = s.getPhones()for p in phones: print p