Subversion Repositories SmartDukaan

Rev

Rev 4039 | Rev 4203 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4039 Rev 4198
Line 4... Line 4...
4
@author: Varun Gupta
4
@author: Varun Gupta
5
'''
5
'''
6
 
6
 
7
from BeautifulSoup import BeautifulSoup
7
from BeautifulSoup import BeautifulSoup
8
from BaseScraper import BaseScraper
8
from BaseScraper import BaseScraper
-
 
9
from Utils import removePriceFormatting
9
 
10
 
10
class FlipcartScraper(BaseScraper):
11
class FlipcartScraper(BaseScraper):
11
    
12
    
12
    def __init__(self):
13
    def __init__(self):
13
        BaseScraper.__init__(self)
14
        BaseScraper.__init__(self)
Line 41... Line 42...
41
                        pass
42
                        pass
42
                try:
43
                try:
43
                    if price is None:
44
                    if price is None:
44
                        continue
45
                        continue
45
                    else:
46
                    else:
-
 
47
                        phones.append({
-
 
48
                                'name': str(name), 
-
 
49
                                'price': removePriceFormatting(price),
-
 
50
                                'source': 'flipkart', 
46
                        phones.append({'name': str(name), 'price': str(price), 'product_url': str(product_url), 'in_stock': in_stock})
51
                                'product_url': str(product_url), 
-
 
52
                                'in_stock': in_stock
-
 
53
                            })
47
                
54
                
48
                except UnboundLocalError as e:
55
                except UnboundLocalError as e:
49
                    print e, name
56
                    print e, name
50
                    print div
57
                    print div
51
                    
58
                    
52
                except UnicodeEncodeError as e:
59
                except UnicodeEncodeError as e:
53
                    print 'Unicode Error', e, name
60
                    print 'Unicode Error', e, name
54
                    name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
61
                    name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
55
                    print name_ascii
62
                    print name_ascii
-
 
63
                    phones.append({
-
 
64
                            "name": str(name_ascii), 
-
 
65
                            "price": str(price),
-
 
66
                            'source': 'flipkart',  
-
 
67
                            "in_stock": in_stock, 
56
                    phones.append({"name": str(name_ascii), "price": str(price), "in_stock": in_stock, "product_url": str(product_url)})
68
                            "product_url": str(product_url)
-
 
69
                        })
57
            except KeyError:
70
            except KeyError:
58
                pass
71
                pass
59
        self.phones = phones
72
        self.phones = phones
60
        return phones
73
        return phones
61
    
74
    
Line 72... Line 85...
72
            else:
85
            else:
73
                return None
86
                return None
74
        else:
87
        else:
75
            return None
88
            return None
76
 
89
 
-
 
90
    def getDataFromProductPage(self, url):
-
 
91
        html = BaseScraper.read(self, url)
-
 
92
        soup = BeautifulSoup(html)
-
 
93
        name = soup.find('h1', {'itemprop': 'name'}).string.strip()
-
 
94
        price = soup.find('span',{'id': 'fk-mprod-our-id'}).contents[2]
-
 
95
        in_stock = soup.find('div', {'id': 'fk-stock-info-id'}).string.strip()
-
 
96
        
-
 
97
        data = {
-
 
98
            "product_url": str(url), 
-
 
99
            "source": "flipkart", 
-
 
100
            "price": price, 
-
 
101
            "in_stock": 1 if in_stock == 'In Stock.' else 0, 
-
 
102
            "name": name
-
 
103
        }
-
 
104
        return data
77
 
105
 
78
if __name__ == '__main__':
106
if __name__ == '__main__':
79
    s = FlipcartScraper()
107
    s = FlipcartScraper()
80
    s.setUrl('http://www.flipkart.com/mobiles/all/27')
-
 
81
    s.scrape()
-
 
82
    phones = s.getPhones()
-
 
83
    for p in phones: print p
-
 
84
    print s.getNextUrl()
-
 
85
108
    data = s.getDataFromProductPage('http://www.flipkart.com/mobiles/micromax/itmd4nf8p5rfhk2y?pid=mobd4nf7rcrckjhn')
-
 
109
    print data
-
 
110
    
-
 
111
#    s.setUrl('http://www.flipkart.com/mobiles/all/27')
-
 
112
#    s.scrape()
-
 
113
#    phones = s.getPhones()
-
 
114
#    for p in phones: print p
-
 
115
#    print s.getNextUrl()
-
 
116
86
117