Subversion Repositories SmartDukaan

Rev

Rev 4199 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4199 Rev 5291
Line 21... Line 21...
21
        self.soup = BeautifulSoup(html)
21
        self.soup = BeautifulSoup(html)
22
    
22
    
23
    def getPhones(self):
23
    def getPhones(self):
24
        phone_prices = []
24
        phone_prices = []
25
        for li in self.soup.findAll('ul', {'class': 'srch_result portrait'})[0]('li'):
25
        for li in self.soup.findAll('ul', {'class': 'srch_result portrait'})[0]('li'):
26
 
26
            
27
            name = li.findAll('span', {'class': 'title'})[0].string
27
            name = li.find('span', {'class': 'title'}).contents[1].strip()
28
            try:
28
            try:
29
                price = li.findAll('div', {'class': 'price'})[0].findAll('span', {'class': 'normal'})[0].string
29
                price = li.find('div', {'class': 'price'}).find('span', {'class': 'normal'}).string
30
            except IndexError:
30
            except IndexError:
-
 
31
                price = removePriceFormatting(li.find('span', {'class': 'price'}).contents[-1].strip())
-
 
32
            except AttributeError:
31
                price = removePriceFormatting(li.findAll('span', {'class': 'price'})[0].contents[-1].strip())
33
                price = removePriceFormatting(li.find('span', {'class': 'price'}).contents[-1].strip())
32
            
34
            
33
            url = li.findAll('a')[0]['href']
35
            url = li.findAll('a')[0]['href']
34
            
36
            
35
            try:
37
            try:
36
                phone_prices.append({
38
                phone_prices.append({
37
                        'name': str(name), 
39
                        'name': str(name), 
38
                        'price': str(price),
40
                        'price': removePriceFormatting(str(price)),
39
                        'source': 'infibeam', 
41
                        'source': 'infibeam', 
40
                        'in_stock': 1, 
42
                        'in_stock': 1, 
41
                        'product_url': str(url)
43
                        'product_url': str(url)
42
                    })
44
                    })
43
                
45
                
Line 45... Line 47...
45
                print 'Unicode Error', e, name
47
                print 'Unicode Error', e, name
46
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
48
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
47
                print name_ascii
49
                print name_ascii
48
                phone_prices.append({
50
                phone_prices.append({
49
                        "name": str(name_ascii), 
51
                        "name": str(name_ascii), 
50
                        "price": str(price),
52
                        "price": removePriceFormatting(str(price)),
51
                        'source': 'infibeam', 
53
                        'source': 'infibeam', 
52
                        "in_stock": 1, 
54
                        "in_stock": 1, 
53
                        "product_url": str(url)
55
                        "product_url": str(url)
54
                    })
56
                    })
55
            
57
            
Line 78... Line 80...
78
        }
80
        }
79
        return data
81
        return data
80
 
82
 
81
if __name__ == '__main__':
83
if __name__ == '__main__':
82
    s = InfibeamScraper()
84
    s = InfibeamScraper()
83
    print s.getDataFromProductPage('http://www.infibeam.com/Mobiles/i-HTC-EVO-3D-Android-Smartphone/P-E-M-HTC-EVO-3D.html?id=Black')
-
 
84
#    s.setUrl('http://www.infibeam.com/Mobiles/search?page=17')
-
 
85
#    s.scrape()
-
 
86
#    products = s.getPhones()
-
 
87
#    print products
-
 
88
85
#    print s.getDataFromProductPage('http://www.infibeam.com/Mobiles/i-HTC-EVO-3D-Android-Smartphone/P-E-M-HTC-EVO-3D.html?id=Black')
-
 
86
    s.setUrl('http://www.infibeam.com/Mobiles/search?page=5')
-
 
87
    s.scrape()
-
 
88
    products = s.getPhones()
-
 
89
    
-
 
90
    print products
-
 
91
    print s.getNextUrl()
-
 
92
89
93