Subversion Repositories SmartDukaan

Rev

Rev 3232 | Rev 4198 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 3232 Rev 4039
Line 2... Line 2...
2
Created on 24-Aug-2011
2
Created on 24-Aug-2011
3
 
3
 
4
@author: Varun Gupta
4
@author: Varun Gupta
5
'''
5
'''
6
from BeautifulSoup import BeautifulSoup
6
from BeautifulSoup import BeautifulSoup
7
import urllib
7
from BaseScraper import BaseScraper
8
 
8
 
9
class InfibeamScraper:
9
class InfibeamScraper(BaseScraper):
10
    
10
    
11
    def __init__(self):
11
    def __init__(self):
12
        self.url = None
12
        self.url = None
13
        self.id = None
13
        self.id = None
14
    
14
    
15
    def setUrl(self, url):
15
    def setUrl(self, url):
16
        self.url = url
16
        self.url = url
17
    
17
    
18
    def scrape(self):
18
    def scrape(self):
19
        sock = urllib.urlopen(self.url)
19
        html = BaseScraper.read(self, self.url)
20
        html = sock.read()
-
 
21
        sock.close()
-
 
22
        self.soup = BeautifulSoup(html)
20
        self.soup = BeautifulSoup(html)
23
    
21
    
24
    def getPhonePrices(self):
22
    def getPhones(self):
25
        phone_prices = []
23
        phone_prices = []
-
 
24
        for li in self.soup.findAll('ul', {'class': 'srch_result portrait'})[0]('li'):
-
 
25
 
-
 
26
            name = li.findAll('span', {'class': 'title'})[0].string
-
 
27
            try:
-
 
28
                price = li.findAll('div', {'class': 'price'})[0].findAll('span', {'class': 'normal'})[0].string
-
 
29
            except IndexError:
-
 
30
                price = li.findAll('span', {'class': 'price'})[0].contents[-1].strip()
26
        
31
            
-
 
32
            url = li.findAll('a')[0]['href']
-
 
33
            
-
 
34
            try:
-
 
35
                phone_prices.append({'name': str(name), 'price': str(price), 'in_stock': 1, 'product_url': str(url)})
-
 
36
                
-
 
37
            except UnicodeEncodeError as e:
-
 
38
                print 'Unicode Error', e, name
-
 
39
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
-
 
40
                print name_ascii
-
 
41
                phone_prices.append({"name": str(name_ascii), "price": str(price), "in_stock": 1, "product_url": str(url)})
-
 
42
            
27
        return phone_prices
43
        return phone_prices
28
    
44
    
29
    def getNextUrl(self):
45
    def getNextUrl(self):
-
 
46
        b = self.soup.findAll('div', {'class': 'resultsSummary'})[0].findAll('b')
-
 
47
        current_max = int(b[0].string.split('-')[1])
-
 
48
        total_products = int(b[1].string)
30
        pass
49
        
-
 
50
        return 'http://www.infibeam.com/Mobiles/search?page=%d' % (1 + current_max / 20) if current_max < total_products else None
31
 
51
 
32
if __name__ == '__main__':
52
if __name__ == '__main__':
33
    s = InfibeamScraper()
53
    s = InfibeamScraper()
34
    s.setUrl('http://www.infibeam.com/Mobiles/search')
54
    s.setUrl('http://www.infibeam.com/Mobiles/search?page=17')
35
    s.scrape()
55
    s.scrape()
36
    print s.getNextUrl()
-
 
37
56
    products = s.getPhones()
-
 
57
    print products
-
 
58
38
59