Rev 4198 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 24-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupimport urllibclass LetsBuyScraper:def __init__(self):self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):sock = urllib.urlopen(self.url)html = sock.read()sock.close()self.soup = BeautifulSoup(html)def getPhonePrices(self):phone_prices = []for div in self.soup.findAll('div', {'class': "detailbox"}):name = div('h2')[0]('a')[0].string.strip()price = div.findAll('span', {'class': "text12_stb"})[0].string.strip()print name, pricephone_prices.append({'name': str(name), 'price': str(price)})return phone_pricesdef getNextUrl(self):next_url = Nonefor anchor in self.soup.findAll('a'):try:if anchor['title'].strip() == "Next Page":next_url = anchor['href'].strip()except KeyError:passreturn next_urlif __name__ == '__main__':s = LetsBuyScraper()s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')s.scrape()print s.getNextUrl()