Subversion Repositories SmartDukaan

Rev

Rev 3232 | Rev 4198 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 24-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
4039 varun.gupt 7
from BaseScraper import BaseScraper
3232 varun.gupt 8
 
4039 varun.gupt 9
class LetsBuyScraper(BaseScraper):
3232 varun.gupt 10
 
11
    def __init__(self):
4039 varun.gupt 12
        BaseScraper.__init__(self)
3232 varun.gupt 13
        self.url = None
14
        self.id = None
15
 
16
    def setUrl(self, url):
17
        self.url = url
18
 
19
    def scrape(self):
4039 varun.gupt 20
        html = BaseScraper.read(self, self.url)
3232 varun.gupt 21
        self.soup = BeautifulSoup(html)
22
 
4039 varun.gupt 23
    def getPhones(self):
3232 varun.gupt 24
        phone_prices = []
4039 varun.gupt 25
 
3232 varun.gupt 26
        for div in self.soup.findAll('div', {'class': "detailbox"}):
4039 varun.gupt 27
            name_tag = div('h2')[0]('a')[0]
28
            name = name_tag.string.strip()
3232 varun.gupt 29
            price = div.findAll('span', {'class': "text12_stb"})[0].string.strip()
4039 varun.gupt 30
            url = str(name_tag['href'])
31
            try:
32
                phone_prices.append({"name": str(name), "price": str(price), "in_stock": 1, "product_url": str(url)})
33
            except UnicodeEncodeError as e:
34
                print 'Unicode Error', e, name
35
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
36
                print name_ascii
37
                phone_prices.append({"name": str(name_ascii), "price": str(price), "in_stock": 1, "product_url": str(url)})
38
 
3232 varun.gupt 39
        return phone_prices
40
 
41
    def getNextUrl(self):
42
        next_url = None
43
 
44
        for anchor in self.soup.findAll('a'):
45
            try:
46
                if anchor['title'].strip() == "Next Page":
47
                    next_url = anchor['href'].strip()
48
            except KeyError:
49
                pass
50
 
51
        return next_url
52
 
53
if __name__ == '__main__':
54
    s = LetsBuyScraper()
55
    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')
56
    s.scrape()
4039 varun.gupt 57
    phones = s.getPhones()
58
    print phones
3232 varun.gupt 59
    print s.getNextUrl()