Subversion Repositories SmartDukaan

Rev

Rev 4199 | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 24-Aug-2011

@author: Varun Gupta
'''
from BeautifulSoup import BeautifulSoup
from BaseScraper import BaseScraper
from Utils import removePriceFormatting
import json

class LetsBuyScraper(BaseScraper):
    
    pageCount = {}
    
    def __init__(self):
        BaseScraper.__init__(self)
        self.url = None
        self.id = None
        self.currentPage = None
        self.category = None
    
    def setUrl(self, url):
        self.url = url
        
        for params in url.split('?')[1].split('&'):
            paramName = params.split('=')[0].strip()
            
            if paramName == 'pg':
                self.currentPage = int(params.split('=')[1])
            
            elif paramName == 'c':
                self.category = params.split('=')[1]
        
        if self.currentPage is None:
            self.currentPage = 1
    
    def scrape(self):
        str = BaseScraper.read(self, self.url)
        self.json = json.loads(str)
        self.setPageCount()
    
    def getPhones(self):
        phones = []
        for product in self.json['result']:
            phones.append({
                        'name': str(product['products_name']),
                        'price': product['products_price'],
                        'source': 'letsbuy',
                        'product_url': str(product['url']),
                        'in_stock': int(product['product_status'])
                    })
        return phones
    
    def getNextUrl(self):
        
        if self.currentPage < LetsBuyScraper.pageCount[self.category]:
            return 'http://www.letsbuy.com/filterResult?c=%s&pp=192&pg=%s' % (self.category, self.currentPage + 1)
        else: 
            return None

    def setPageCount(self):
        if LetsBuyScraper.pageCount is None or self.category not in LetsBuyScraper.pageCount:
            resultCount = int(self.json['resultCount']['0'])
            LetsBuyScraper.pageCount[self.category] = 1 + int(resultCount / 192)
    
    def getDataFromProductPage(self, url):
        html = BaseScraper.read(self, url)
        soup = BeautifulSoup(html)
        name = soup.find('h1', {'class': 'prod_name'}).string.strip()
        price = removePriceFormatting(soup.find('span',{'class': 'offer_price'}).string.strip())
        
        data = {
            "product_url": str(url),
            "source": "letsbuy",
            "price": price,
            "in_stock": 1,
            "name": name
        }
        return data


if __name__ == '__main__':
    s = LetsBuyScraper()
#    print s.getDataFromProductPage('http://www.letsbuy.com/samsung-galaxy-pop-s5570-p-14143')
#    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88')
    s.setUrl('http://www.letsbuy.com/filterResult?c=254_88&pp=192&pg=7')
    s.scrape()
    
    print s.getPhones()
    print s.getNextUrl()