Subversion Repositories SmartDukaan

Rev

Rev 3232 | Rev 4198 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 24-Aug-2011

@author: Varun Gupta
'''
from BeautifulSoup import BeautifulSoup
from BaseScraper import BaseScraper

class InfibeamScraper(BaseScraper):
    
    def __init__(self):
        self.url = None
        self.id = None
    
    def setUrl(self, url):
        self.url = url
    
    def scrape(self):
        html = BaseScraper.read(self, self.url)
        self.soup = BeautifulSoup(html)
    
    def getPhones(self):
        phone_prices = []
        for li in self.soup.findAll('ul', {'class': 'srch_result portrait'})[0]('li'):

            name = li.findAll('span', {'class': 'title'})[0].string
            try:
                price = li.findAll('div', {'class': 'price'})[0].findAll('span', {'class': 'normal'})[0].string
            except IndexError:
                price = li.findAll('span', {'class': 'price'})[0].contents[-1].strip()
            
            url = li.findAll('a')[0]['href']
            
            try:
                phone_prices.append({'name': str(name), 'price': str(price), 'in_stock': 1, 'product_url': str(url)})
                
            except UnicodeEncodeError as e:
                print 'Unicode Error', e, name
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
                print name_ascii
                phone_prices.append({"name": str(name_ascii), "price": str(price), "in_stock": 1, "product_url": str(url)})
            
        return phone_prices
    
    def getNextUrl(self):
        b = self.soup.findAll('div', {'class': 'resultsSummary'})[0].findAll('b')
        current_max = int(b[0].string.split('-')[1])
        total_products = int(b[1].string)
        
        return 'http://www.infibeam.com/Mobiles/search?page=%d' % (1 + current_max / 20) if current_max < total_products else None

if __name__ == '__main__':
    s = InfibeamScraper()
    s.setUrl('http://www.infibeam.com/Mobiles/search?page=17')
    s.scrape()
    products = s.getPhones()
    print products