Subversion Repositories SmartDukaan

Rev

Blame | Last modification | View Log | RSS feed

'''
Created on 29-Aug-2011

@author: Varun Gupta
'''
from BeautifulSoup import BeautifulSoup
import urllib

class SaholicScraper:

    def __init__(self):
        self.url = None
        self.id = None
        
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        sock = urllib.urlopen(self.url)
        html = sock.read()
        sock.close()
        self.soup = BeautifulSoup(html)
    

    def getPhonePrices(self):
        phone_prices = []
        
        for div in self.soup.findAll('div', {'class': 'productDetails'}):
            try:
                name = div.findAll('div', {'class': 'title'})[0]('a')[0].string.strip()
                price = div.findAll('span', {'class': 'newPrice'})[1].string.strip()
                phone_prices.append({'name': str(name), 'price': str(price)})
            
            except KeyError:
                pass
        
        return phone_prices
        
    def getNextUrl(self):
        try:
            anchors = self.soup.findAll('li', {'class': 'pager-next'})[0]('a')
            return 'http://www.saholic.com/all-mobile-phones/10001%s' % str(anchors[0]['href'].strip())
        except Exception as e:
            return None

if __name__ == '__main__':
    scraper = SaholicScraper()
    scraper.setUrl('http://www.saholic.com/all-mobile-phones/10001')
    scraper.scrape()
    print scraper.getNextUrl()