Subversion Repositories SmartDukaan

Rev

Rev 3232 | Rev 4198 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 3232 Rev 4039
Line 2... Line 2...
2
Created on 24-Aug-2011
2
Created on 24-Aug-2011
3
 
3
 
4
@author: Varun Gupta
4
@author: Varun Gupta
5
'''
5
'''
6
from BeautifulSoup import BeautifulSoup
6
from BeautifulSoup import BeautifulSoup
7
import urllib
7
from BaseScraper import BaseScraper
8
 
8
 
9
class LetsBuyScraper:
9
class LetsBuyScraper(BaseScraper):
10
    
10
    
11
    def __init__(self):
11
    def __init__(self):
-
 
12
        BaseScraper.__init__(self)
12
        self.url = None
13
        self.url = None
13
        self.id = None
14
        self.id = None
14
        
-
 
15
    
15
    
16
    def setUrl(self, url):
16
    def setUrl(self, url):
17
        self.url = url
17
        self.url = url
18
    
18
    
19
    def scrape(self):
19
    def scrape(self):
20
        sock = urllib.urlopen(self.url)
20
        html = BaseScraper.read(self, self.url)
21
        html = sock.read()
-
 
22
        sock.close()
-
 
23
        self.soup = BeautifulSoup(html)
21
        self.soup = BeautifulSoup(html)
24
    
22
    
25
    def getPhonePrices(self):
23
    def getPhones(self):
26
        phone_prices = []
24
        phone_prices = []
27
        
25
 
28
        for div in self.soup.findAll('div', {'class': "detailbox"}):
26
        for div in self.soup.findAll('div', {'class': "detailbox"}):
29
            name = div('h2')[0]('a')[0].string.strip()
27
            name_tag = div('h2')[0]('a')[0]
-
 
28
            name = name_tag.string.strip()
30
            price = div.findAll('span', {'class': "text12_stb"})[0].string.strip()
29
            price = div.findAll('span', {'class': "text12_stb"})[0].string.strip()
-
 
30
            url = str(name_tag['href'])
-
 
31
            try:
-
 
32
                phone_prices.append({"name": str(name), "price": str(price), "in_stock": 1, "product_url": str(url)})
-
 
33
            except UnicodeEncodeError as e:
-
 
34
                print 'Unicode Error', e, name
-
 
35
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
31
            print name, price
36
                print name_ascii
32
            phone_prices.append({'name': str(name), 'price': str(price)})
37
                phone_prices.append({"name": str(name_ascii), "price": str(price), "in_stock": 1, "product_url": str(url)})
-
 
38
            
33
        return phone_prices
39
        return phone_prices
34
    
40
    
35
    def getNextUrl(self):
41
    def getNextUrl(self):
36
        next_url = None
42
        next_url = None
37
        
43
        
Line 46... Line 52...
46
 
52
 
47
if __name__ == '__main__':
53
if __name__ == '__main__':
48
    s = LetsBuyScraper()
54
    s = LetsBuyScraper()
49
    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')
55
    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')
50
    s.scrape()
56
    s.scrape()
-
 
57
    phones = s.getPhones()
-
 
58
    print phones
51
    print s.getNextUrl()
59
    print s.getNextUrl()
52
60