Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 24-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
7
import urllib
8
 
9
class FlipcartScraper:
10
 
11
    def __init__(self):
12
        self.url = None
13
        self.id = None
14
 
15
 
16
    def setUrl(self, url):
17
        self.url = url
18
 
19
    def scrape(self):
20
        sock = urllib.urlopen(self.url)
21
        html = sock.read()
22
        sock.close()
23
        self.soup = BeautifulSoup(html)
24
 
25
    def getPhones(self):
26
        phones = []
27
 
28
        for div in self.soup.findAll('div', {'class': 'fk-product-thumb fkp-medium'}):
29
            try:
30
                anchor = div.findAll('a', {'class': 'title fk-anchor-link'})[0]
31
                name = anchor['title'].strip()
32
                product_url = anchor['href'].strip()
33
                in_stock = 0 if div.findAll('b').__len__() > 0 else 1
34
 
35
                for span in div.findAll('span'):
36
                    try:
37
                        if span['class'].find('price final-price') > -1:
38
                            price = span.string.strip()
39
                    except KeyError:
40
                        pass
41
                phones.append({'name': str(name), 'price': str(price), 'product_url': str(product_url), 'in_stock': in_stock})
42
 
43
            except KeyError:
44
                pass
45
        return phones
46
 
47
    def getNextUrl(self):
48
        tab_info = self.soup.findAll('div', {'class': 'unit fk-lres-header-text'})[0]('b')
49
 
50
        current_max = int(tab_info[0].string.split('-')[1])
51
        total = int(tab_info[1].string)
52
 
53
        if current_max < total:
54
            return 'http://www.flipkart.com/mobiles/all/%d' % (1 + (current_max / 20))
55
        else:
56
            return None
57
 
58
if __name__ == '__main__':
59
    s = FlipcartScraper()
60
    s.setUrl('http://www.flipkart.com/mobiles/all/24')
61
    s.scrape()
62
    phones = s.getPhones()
63
    for p in phones: print p