Subversion Repositories SmartDukaan

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5639 amar.kumar 1
from BeautifulSoup import BeautifulSoup
2
from BaseScraper import BaseScraper
3
from Utils import removePriceFormatting
4
 
5
class SulekhaScraper(BaseScraper):
6
 
7
    pageCount = 11
8
    productCountPerScraping = 24
9
    currentPage=1;
10
 
11
    def __init__(self):
12
        BaseScraper.__init__(self)
13
        self.url = None
14
        self.id = None
15
        #self.currentPage = 1
16
 
17
    def setUrl(self, url):
18
        self.url = url
19
 
20
    def scrape(self):
21
        html = BaseScraper.read(self, self.url)
22
        self.soup = BeautifulSoup(html)
23
        self.phones = None
24
        #self.setPageCount()
25
 
26
    def getPhones(self):
27
        phones = []
28
        allPhoneUl = self.soup.find('ul', id="MMobBrandOffersListCont")
29
        try:
30
            for li in allPhoneUl.findAll('li'):
31
                anchorDiv = li.find('div', {'class': 'dealtit'})
32
                anchor = anchorDiv.find('a')
33
                name = anchor.string.strip()
34
                price = li.find('span',{'class': 'deals-our-price'}).contents[1].strip()
35
                product_url = anchor['href'].strip()
36
                in_stock = 1
37
 
38
                try:
39
                    if price is None:
40
                        continue
41
                    else:
42
                        phones.append({
43
                            'name': str(name), 
44
                            'price': removePriceFormatting(price),
45
                            'source': 'sulekha', 
46
                            'product_url': str(product_url), 
47
                            'in_stock': in_stock
48
                        })
49
 
50
                except UnboundLocalError as e:
51
                    print e, name
52
                    print li
53
 
54
        except Exception as e:
55
            print e
56
 
57
        self.phones = phones
58
        return phones
59
 
60
    def setPageCount(self):
61
        self.currentPage = self.currentPage + 1
62
 
63
    def getNextUrl(self):
64
        if SulekhaScraper.currentPage < SulekhaScraper.pageCount:
65
            SulekhaScraper.currentPage += 1
66
            return 'http://mobiles.sulekha.com/common/common.aspx?type=mobileofferslist&makeId=0&modelId=0&pageNo=%s' % SulekhaScraper.currentPage         
67
        else:
68
            return None
69
 
70
    def getDataFromProductPage(self, url):
71
        html = BaseScraper.read(self, url)
72
        soup = BeautifulSoup(html)
73
        name = soup.find('h1', {'class': 'product-title'})('a')[0].contents[0].string.strip()
74
        price = soup.find('span',{'itemprop': 'price'}).string.strip()
75
        in_stock = 1
76
 
77
        data = {
78
            "product_url": str(url), 
79
            "source": "sulekha", 
80
            "price": price, 
81
            "in_stock": 1, 
82
            "name": name
83
        }
84
        return data
85
 
86
if __name__ == '__main__':
87
    s = SulekhaScraper()
88
 
89
    data = s.getDataFromProductPage('http://deals.sulekha.com/blackberry-curve-9360-white-17561')
90
    print data