Subversion Repositories SmartDukaan

Rev

Rev 4199 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 24-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
4039 varun.gupt 7
from BaseScraper import BaseScraper
4198 varun.gupt 8
from Utils import removePriceFormatting
5291 varun.gupt 9
import json
3232 varun.gupt 10
 
4039 varun.gupt 11
class LetsBuyScraper(BaseScraper):
3232 varun.gupt 12
 
5291 varun.gupt 13
    pageCount = {}
14
 
3232 varun.gupt 15
    def __init__(self):
4039 varun.gupt 16
        BaseScraper.__init__(self)
3232 varun.gupt 17
        self.url = None
18
        self.id = None
5291 varun.gupt 19
        self.currentPage = None
20
        self.category = None
3232 varun.gupt 21
 
22
    def setUrl(self, url):
23
        self.url = url
5291 varun.gupt 24
 
25
        for params in url.split('?')[1].split('&'):
26
            paramName = params.split('=')[0].strip()
27
 
28
            if paramName == 'pg':
29
                self.currentPage = int(params.split('=')[1])
30
 
31
            elif paramName == 'c':
32
                self.category = params.split('=')[1]
33
 
34
        if self.currentPage is None:
35
            self.currentPage = 1
3232 varun.gupt 36
 
37
    def scrape(self):
5291 varun.gupt 38
        str = BaseScraper.read(self, self.url)
39
        self.json = json.loads(str)
40
        self.setPageCount()
3232 varun.gupt 41
 
4039 varun.gupt 42
    def getPhones(self):
5291 varun.gupt 43
        phones = []
44
        for product in self.json['result']:
45
            phones.append({
46
                        'name': str(product['products_name']),
47
                        'price': product['products_price'],
48
                        'source': 'letsbuy',
49
                        'product_url': str(product['url']),
50
                        'in_stock': int(product['product_status'])
4198 varun.gupt 51
                    })
5291 varun.gupt 52
        return phones
3232 varun.gupt 53
 
54
    def getNextUrl(self):
55
 
5291 varun.gupt 56
        if self.currentPage < LetsBuyScraper.pageCount[self.category]:
57
            return 'http://www.letsbuy.com/filterResult?c=%s&pp=192&pg=%s' % (self.category, self.currentPage + 1)
58
        else: 
59
            return None
3232 varun.gupt 60
 
5291 varun.gupt 61
    def setPageCount(self):
62
        if LetsBuyScraper.pageCount is None or self.category not in LetsBuyScraper.pageCount:
63
            resultCount = int(self.json['resultCount']['0'])
64
            LetsBuyScraper.pageCount[self.category] = 1 + int(resultCount / 192)
65
 
4199 varun.gupt 66
    def getDataFromProductPage(self, url):
67
        html = BaseScraper.read(self, url)
68
        soup = BeautifulSoup(html)
69
        name = soup.find('h1', {'class': 'prod_name'}).string.strip()
70
        price = removePriceFormatting(soup.find('span',{'class': 'offer_price'}).string.strip())
71
 
72
        data = {
73
            "product_url": str(url),
74
            "source": "letsbuy",
75
            "price": price,
76
            "in_stock": 1,
77
            "name": name
78
        }
79
        return data
80
 
81
 
3232 varun.gupt 82
if __name__ == '__main__':
83
    s = LetsBuyScraper()
5291 varun.gupt 84
#    print s.getDataFromProductPage('http://www.letsbuy.com/samsung-galaxy-pop-s5570-p-14143')
85
#    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88')
86
    s.setUrl('http://www.letsbuy.com/filterResult?c=254_88&pp=192&pg=7')
87
    s.scrape()
4199 varun.gupt 88
 
5291 varun.gupt 89
    print s.getPhones()
90
    print s.getNextUrl()