Blame | Last modification | View Log | RSS feed
'''Created on 29-Aug-2011@author: Varun Gupta'''from BeautifulSoup import BeautifulSoupimport urllibclass SaholicScraper:def __init__(self):self.url = Noneself.id = Nonedef setUrl(self, url):self.url = urldef scrape(self):sock = urllib.urlopen(self.url)html = sock.read()sock.close()self.soup = BeautifulSoup(html)def getPhonePrices(self):phone_prices = []for div in self.soup.findAll('div', {'class': 'productDetails'}):try:name = div.findAll('div', {'class': 'title'})[0]('a')[0].string.strip()price = div.findAll('span', {'class': 'newPrice'})[1].string.strip()phone_prices.append({'name': str(name), 'price': str(price)})except KeyError:passreturn phone_pricesdef getNextUrl(self):try:anchors = self.soup.findAll('li', {'class': 'pager-next'})[0]('a')return 'http://www.saholic.com/all-mobile-phones/10001%s' % str(anchors[0]['href'].strip())except Exception as e:return Noneif __name__ == '__main__':scraper = SaholicScraper()scraper.setUrl('http://www.saholic.com/all-mobile-phones/10001')scraper.scrape()print scraper.getNextUrl()