Subversion Repositories SmartDukaan

Rev

Rev 5761 | Rev 6022 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5639 amar.kumar 1
from BeautifulSoup import BeautifulSoup
2
from BaseScraper import BaseScraper
3
from Utils import removePriceFormatting
4
 
5
import time
6
 
7
class TradusScraper(BaseScraper):
8
 
5761 amar.kumar 9
    mobilePageCount = 32
10
    tabletPageCount = 21
5639 amar.kumar 11
    productCountPerScraping = 20
5761 amar.kumar 12
    mobileCurrentPage = 0
13
    tabletCurrentPage = 0
5639 amar.kumar 14
 
15
 
16
    def __init__(self):
17
        BaseScraper.__init__(self)
18
        self.url = None
19
        self.id = None
20
 
21
    def setUrl(self, url):
22
        self.url = url
23
 
24
    def scrape(self):
25
        html = BaseScraper.read(self, self.url)
26
        self.soup = BeautifulSoup(html)
27
        self.phones = None
28
        #self.setPageCount()
29
 
30
    def getPhones(self):
31
        phones = []
32
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
33
            try:
34
                anchor = div.find('a')
5761 amar.kumar 35
                if(len(anchor.contents)==1):
5639 amar.kumar 36
                    name = anchor.contents[0].strip()
5761 amar.kumar 37
                elif(anchor.contents[1].string =="Tablet"):
38
                    name = anchor.contents[0] + "Tablet"
39
                    if(len(anchor.contents)>2):
40
                        name = name + anchor.contents[2];
41
                else:
42
                    name = anchor.contents[2].strip()
43
                    if(len(name)== 0):
44
                        name = anchor.contents[0].strip()
5639 amar.kumar 45
                product_url = anchor['href'].strip()
46
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
47
                in_stock = 1
48
 
49
                try:
50
                    if price is None:
51
                        continue
52
                    else:
53
                        phones.append({
54
                                'name': str(name), 
55
                                'price': removePriceFormatting(price),
56
                                'source': 'tradus', 
57
                                'product_url': str(product_url), 
58
                                'in_stock': in_stock
59
                            })
60
                except Exception as e:
61
                    print e
62
                    pass
63
 
64
            except IndexError as iex:
65
                try:
66
                    price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
67
                    in_stock = 1
68
 
69
                    phones.append({
70
                                'name': str(name), 
71
                                'price': removePriceFormatting(price),
72
                                'source': 'tradus', 
73
                                'product_url': str(product_url), 
74
                                'in_stock': in_stock
75
                            })
76
 
77
                except Exception as ex:
78
                    print ex
79
                    pass
80
            except Exception as e:
81
                print e
82
                pass
5761 amar.kumar 83
        self.phones = phones
84
        return phones
5639 amar.kumar 85
 
86
    def setPageCount(self):
87
        self.currentPage = self.currentPage + 1
88
 
89
    def getNextUrl(self):
90
        time.sleep(1)
5761 amar.kumar 91
        if "mobile" in self.url:
92
            if TradusScraper.mobileCurrentPage < TradusScraper.mobilePageCount:
93
                TradusScraper.mobileCurrentPage += 1
94
                return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.mobileCurrentPage)
95
            else:
96
                return None
97
        elif "tablets" in self.url:
98
            if TradusScraper.tabletCurrentPage < TradusScraper.tabletPageCount:
99
                TradusScraper.tabletCurrentPage += 1
100
                return 'http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=%s' % (TradusScraper.tabletCurrentPage)
101
            else:
102
                return None
5639 amar.kumar 103
        else:
104
            return None
5761 amar.kumar 105
 
5639 amar.kumar 106
 
107
    def getDataFromProductPage(self, url):
108
        html = BaseScraper.read(self, url)
109
        soup = BeautifulSoup(html)
110
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
111
        price = soup.find('b', {'id': 'tPrice'}).string.strip()
5770 amar.kumar 112
        if("Rs." in price):
113
            price = price[4:]
5639 amar.kumar 114
        in_stock = 1
115
 
116
        data = {
117
            "product_url": str(url), 
118
            "source": "tradus", 
119
            "price": price, 
120
            "in_stock": 1, 
121
            "name": name
122
        }
123
        return data
124
 
125
if __name__ == '__main__':
126
    s = TradusScraper()
127
    '''html = BaseScraper.read(s,'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756')
128
    soup = BeautifulSoup(html)
129
 
130
    phones = []
131
    for div in soup.findAll('div', {'class': 'mainresult-show-right'}):
132
        try:
133
            anchor = div.find('a')
134
            name = anchor.contents[2].strip()
135
            if(len(name)== 0):
136
                name = anchor.contents[0].strip()
137
            product_url = anchor['href'].strip()
138
            price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
139
            in_stock = 1
140
 
141
            try:
142
                if price is None:
143
                    continue
144
                else:
145
                    phones.append({
146
                            'name': str(name), 
147
                            'price': removePriceFormatting(price),
148
                            'source': 'tradus', 
149
                            'product_url': str(product_url), 
150
                            'in_stock': in_stock
151
                        })
152
            except Exception as e:
153
                print e
154
                pass
155
 
156
        except IndexError as iex:
157
            try:
158
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
159
                in_stock = 1
160
 
161
                phones.append({
162
                            'name': str(name), 
163
                            'price': removePriceFormatting(price),
164
                            'source': 'tradus', 
165
                            'product_url': str(product_url), 
166
                            'in_stock': in_stock
167
                        })
168
 
169
            except Exception as ex:
170
                print ex
171
                pass
172
        except Exception as e:
173
            print e
174
            pass
175
    print phones'''
176
 
177
 
178
    data = s.getDataFromProductPage('http://www.tradus.com/samsung-galaxy-y-pro-duos-b5512-mobile-phone/p/MOB0000004549294')
179
    print data