Subversion Repositories SmartDukaan

Rev

Rev 5761 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5639 amar.kumar 1
from BeautifulSoup import BeautifulSoup
2
from BaseScraper import BaseScraper
3
from Utils import removePriceFormatting
4
 
5
import time
6
 
7
class TradusScraper(BaseScraper):
8
 
9
    pageCount = 67
10
    productCountPerScraping = 20
11
    currentPage = 0
12
 
13
 
14
    def __init__(self):
15
        BaseScraper.__init__(self)
16
        self.url = None
17
        self.id = None
18
 
19
    def setUrl(self, url):
20
        self.url = url
21
 
22
    def scrape(self):
23
        html = BaseScraper.read(self, self.url)
24
        self.soup = BeautifulSoup(html)
25
        self.phones = None
26
        #self.setPageCount()
27
 
28
    def getPhones(self):
29
        phones = []
30
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
31
            try:
32
                anchor = div.find('a')
33
                name = anchor.contents[2].strip()
34
                if(len(name)== 0):
35
                    name = anchor.contents[0].strip()
36
                product_url = anchor['href'].strip()
37
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
38
                in_stock = 1
39
 
40
                try:
41
                    if price is None:
42
                        continue
43
                    else:
44
                        phones.append({
45
                                'name': str(name), 
46
                                'price': removePriceFormatting(price),
47
                                'source': 'tradus', 
48
                                'product_url': str(product_url), 
49
                                'in_stock': in_stock
50
                            })
51
                except Exception as e:
52
                    print e
53
                    pass
54
 
55
            except IndexError as iex:
56
                try:
57
                    price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
58
                    in_stock = 1
59
 
60
                    phones.append({
61
                                'name': str(name), 
62
                                'price': removePriceFormatting(price),
63
                                'source': 'tradus', 
64
                                'product_url': str(product_url), 
65
                                'in_stock': in_stock
66
                            })
67
 
68
                except Exception as ex:
69
                    print ex
70
                    pass
71
            except Exception as e:
72
                print e
73
                pass
74
            self.phones = phones
75
            return phones
76
 
77
    def setPageCount(self):
78
        self.currentPage = self.currentPage + 1
79
 
80
    def getNextUrl(self):
81
        time.sleep(1)
82
        if TradusScraper.currentPage < TradusScraper.pageCount:
83
            TradusScraper.currentPage += 1
84
            return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.currentPage)
85
        else:
86
            return None
87
 
88
    def getDataFromProductPage(self, url):
89
        html = BaseScraper.read(self, url)
90
        soup = BeautifulSoup(html)
91
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
92
        price = soup.find('b', {'id': 'tPrice'}).string.strip()
93
        in_stock = 1
94
 
95
        data = {
96
            "product_url": str(url), 
97
            "source": "tradus", 
98
            "price": price, 
99
            "in_stock": 1, 
100
            "name": name
101
        }
102
        return data
103
 
104
if __name__ == '__main__':
105
    s = TradusScraper()
106
    '''html = BaseScraper.read(s,'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756')
107
    soup = BeautifulSoup(html)
108
 
109
    phones = []
110
    for div in soup.findAll('div', {'class': 'mainresult-show-right'}):
111
        try:
112
            anchor = div.find('a')
113
            name = anchor.contents[2].strip()
114
            if(len(name)== 0):
115
                name = anchor.contents[0].strip()
116
            product_url = anchor['href'].strip()
117
            price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
118
            in_stock = 1
119
 
120
            try:
121
                if price is None:
122
                    continue
123
                else:
124
                    phones.append({
125
                            'name': str(name), 
126
                            'price': removePriceFormatting(price),
127
                            'source': 'tradus', 
128
                            'product_url': str(product_url), 
129
                            'in_stock': in_stock
130
                        })
131
            except Exception as e:
132
                print e
133
                pass
134
 
135
        except IndexError as iex:
136
            try:
137
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
138
                in_stock = 1
139
 
140
                phones.append({
141
                            'name': str(name), 
142
                            'price': removePriceFormatting(price),
143
                            'source': 'tradus', 
144
                            'product_url': str(product_url), 
145
                            'in_stock': in_stock
146
                        })
147
 
148
            except Exception as ex:
149
                print ex
150
                pass
151
        except Exception as e:
152
            print e
153
            pass
154
    print phones'''
155
 
156
 
157
    data = s.getDataFromProductPage('http://www.tradus.com/samsung-galaxy-y-pro-duos-b5512-mobile-phone/p/MOB0000004549294')
158
    print data