Subversion Repositories SmartDukaan

Rev

Rev 4203 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4203 Rev 5291
Line 23... Line 23...
23
        self.soup = BeautifulSoup(html)
23
        self.soup = BeautifulSoup(html)
24
        self.phones = None
24
        self.phones = None
25
    
25
    
26
    def getPhones(self):
26
    def getPhones(self):
27
        phones = []
27
        phones = []
28
        
-
 
29
        for div in self.soup.findAll('div', {'class': 'fk-product-thumb fkp-medium'}):
28
        for div in self.soup.findAll('div', {'class': 'fk-product-thumb fkp-medium'}):
30
            try:
29
            try:
31
                anchor = div.findAll('a', {'class': 'title fk-anchor-link'})[0]
30
                anchor = div.find('a', {'class': 'title tpadding5 fk-anchor-link'})
32
                name = anchor['title'].strip()
31
                name = anchor['title'].strip()
33
                price = None
32
                price = None
34
                product_url = anchor['href'].strip()
33
                product_url = anchor['href'].strip()
35
                in_stock = 0 if div.findAll('b').__len__() > 0 else 1
34
                in_stock = 0 if div.findAll('b').__len__() > 0 else 1
36
                
35
                
Line 91... Line 90...
91
    def getDataFromProductPage(self, url):
90
    def getDataFromProductPage(self, url):
92
        html = BaseScraper.read(self, url)
91
        html = BaseScraper.read(self, url)
93
        soup = BeautifulSoup(html)
92
        soup = BeautifulSoup(html)
94
        name = soup.find('h1', {'itemprop': 'name'}).string.strip()
93
        name = soup.find('h1', {'itemprop': 'name'}).string.strip()
95
        price = soup.find('span',{'id': 'fk-mprod-our-id'}).contents[2]
94
        price = soup.find('span',{'id': 'fk-mprod-our-id'}).contents[2]
96
        in_stock = soup.find('div', {'id': 'fk-stock-info-id'}).string.strip()
95
        in_stock = 1
97
        
96
        
98
        data = {
97
        data = {
99
            "product_url": str(url), 
98
            "product_url": str(url), 
100
            "source": "flipkart", 
99
            "source": "flipkart", 
101
            "price": price, 
100
            "price": price, 
Line 104... Line 103...
104
        }
103
        }
105
        return data
104
        return data
106
 
105
 
107
if __name__ == '__main__':
106
if __name__ == '__main__':
108
    s = FlipcartScraper()
107
    s = FlipcartScraper()
109
#    data = s.getDataFromProductPage('http://www.flipkart.com/mobiles/micromax/itmd4nf8p5rfhk2y?pid=mobd4nf7rcrckjhn')
108
    data = s.getDataFromProductPage('http://www.flipkart.com/samsung-wave-ii-s8530-mobile-phone/p/itmctnexz3gyjfac?pid=MOBCTXB47XCP7Z9X&ref=eca2ea19-cde2-4bfd-a3d8-15cf737c88d3')
110
#    print data
109
    print data
111
    
110
    
112
    s.setUrl('http://www.flipkart.com/mobiles/all/24')
-
 
113
    s.scrape()
-
 
114
    phones = s.getPhones()
-
 
115
    for p in phones: print p
-
 
116
    print s.getNextUrl()
-
 
117
111
#    s.setUrl('http://www.flipkart.com/mobiles/all')
-
 
112
#    s.scrape()
-
 
113
#    phones = s.getPhones()
-
 
114
#    for p in phones: print p
-
 
115
#    print s.getNextUrl()
-
 
116
118
117