Subversion Repositories SmartDukaan

Rev

Rev 4198 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 24-Aug-2011

@author: Varun Gupta
'''
from BeautifulSoup import BeautifulSoup
import urllib

class LetsBuyScraper:
    
    def __init__(self):
        self.url = None
        self.id = None
        
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        sock = urllib.urlopen(self.url)
        html = sock.read()
        sock.close()
        self.soup = BeautifulSoup(html)
    
    def getPhonePrices(self):
        phone_prices = []
        
        for div in self.soup.findAll('div', {'class': "detailbox"}):
            name = div('h2')[0]('a')[0].string.strip()
            price = div.findAll('span', {'class': "text12_stb"})[0].string.strip()
            print name, price
            phone_prices.append({'name': str(name), 'price': str(price)})
        return phone_prices
    
    def getNextUrl(self):
        next_url = None
        
        for anchor in self.soup.findAll('a'):
            try:
                if anchor['title'].strip() == "Next Page":
                    next_url = anchor['href'].strip()
            except KeyError:
                pass
        
        return next_url

if __name__ == '__main__':
    s = LetsBuyScraper()
    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')
    s.scrape()
    print s.getNextUrl()