Subversion Repositories SmartDukaan

Rev

Rev 4198 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 24-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
from BeautifulSoup import BeautifulSoup
4039 varun.gupt 7
from BaseScraper import BaseScraper
4198 varun.gupt 8
from Utils import removePriceFormatting
3232 varun.gupt 9
 
4039 varun.gupt 10
class LetsBuyScraper(BaseScraper):
3232 varun.gupt 11
 
12
    def __init__(self):
4039 varun.gupt 13
        BaseScraper.__init__(self)
3232 varun.gupt 14
        self.url = None
15
        self.id = None
16
 
17
    def setUrl(self, url):
18
        self.url = url
19
 
20
    def scrape(self):
4039 varun.gupt 21
        html = BaseScraper.read(self, self.url)
3232 varun.gupt 22
        self.soup = BeautifulSoup(html)
23
 
4039 varun.gupt 24
    def getPhones(self):
3232 varun.gupt 25
        phone_prices = []
4039 varun.gupt 26
 
3232 varun.gupt 27
        for div in self.soup.findAll('div', {'class': "detailbox"}):
4039 varun.gupt 28
            name_tag = div('h2')[0]('a')[0]
29
            name = name_tag.string.strip()
4198 varun.gupt 30
            price = removePriceFormatting(div.findAll('span', {'class': "text12_stb"})[0].string.strip())
4039 varun.gupt 31
            url = str(name_tag['href'])
32
            try:
4198 varun.gupt 33
                phone_prices.append({
34
                        "name": str(name), 
35
                        "price": str(price),
36
                        'source': 'letsbuy', 
37
                        "in_stock": 1, 
38
                        "product_url": str(url)
39
                    })
4039 varun.gupt 40
            except UnicodeEncodeError as e:
41
                print 'Unicode Error', e, name
42
                name_ascii = "".join([char if ord(char) < 128 else " " for char in name])
43
                print name_ascii
4198 varun.gupt 44
                phone_prices.append({
45
                        "name": str(name_ascii), 
46
                        "price": str(price),
47
                        'source': 'letsbuy', 
48
                        "in_stock": 1, 
49
                        "product_url": str(url)
50
                    })
4039 varun.gupt 51
 
3232 varun.gupt 52
        return phone_prices
53
 
54
    def getNextUrl(self):
55
        next_url = None
56
 
57
        for anchor in self.soup.findAll('a'):
58
            try:
59
                if anchor['title'].strip() == "Next Page":
60
                    next_url = anchor['href'].strip()
61
            except KeyError:
62
                pass
63
 
64
        return next_url
65
 
4199 varun.gupt 66
    def getDataFromProductPage(self, url):
67
        html = BaseScraper.read(self, url)
68
        soup = BeautifulSoup(html)
69
        name = soup.find('h1', {'class': 'prod_name'}).string.strip()
70
        price = removePriceFormatting(soup.find('span',{'class': 'offer_price'}).string.strip())
71
 
72
        data = {
73
            "product_url": str(url),
74
            "source": "letsbuy",
75
            "price": price,
76
            "in_stock": 1,
77
            "name": name
78
        }
79
        return data
80
 
81
 
3232 varun.gupt 82
if __name__ == '__main__':
83
    s = LetsBuyScraper()
4199 varun.gupt 84
    print s.getDataFromProductPage('http://www.letsbuy.com/samsung-galaxy-pop-s5570-p-14143')
85
 
86
#    s.setUrl('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192')
87
#    s.scrape()
88
#    phones = s.getPhones()
89
#    print phones
90
#    print s.getNextUrl()