Subversion Repositories SmartDukaan

Rev

Rev 5639 | Rev 5770 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5639 amar.kumar 1
from BeautifulSoup import BeautifulSoup
2
from BaseScraper import BaseScraper
3
from Utils import removePriceFormatting
4
 
5
import time
6
 
7
class TradusScraper(BaseScraper):
8
 
5761 amar.kumar 9
    mobilePageCount = 32
10
    tabletPageCount = 21
5639 amar.kumar 11
    productCountPerScraping = 20
5761 amar.kumar 12
    mobileCurrentPage = 0
13
    tabletCurrentPage = 0
5639 amar.kumar 14
 
15
 
16
    def __init__(self):
17
        BaseScraper.__init__(self)
18
        self.url = None
19
        self.id = None
20
 
21
    def setUrl(self, url):
22
        self.url = url
23
 
24
    def scrape(self):
25
        html = BaseScraper.read(self, self.url)
26
        self.soup = BeautifulSoup(html)
27
        self.phones = None
28
        #self.setPageCount()
29
 
30
    def getPhones(self):
31
        phones = []
32
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
33
            try:
34
                anchor = div.find('a')
5761 amar.kumar 35
                if(len(anchor.contents)==1):
5639 amar.kumar 36
                    name = anchor.contents[0].strip()
5761 amar.kumar 37
                elif(anchor.contents[1].string =="Tablet"):
38
                    name = anchor.contents[0] + "Tablet"
39
                    if(len(anchor.contents)>2):
40
                        name = name + anchor.contents[2];
41
                else:
42
                    name = anchor.contents[2].strip()
43
                    if(len(name)== 0):
44
                        name = anchor.contents[0].strip()
5639 amar.kumar 45
                product_url = anchor['href'].strip()
46
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
47
                in_stock = 1
48
 
49
                try:
50
                    if price is None:
51
                        continue
52
                    else:
53
                        phones.append({
54
                                'name': str(name), 
55
                                'price': removePriceFormatting(price),
56
                                'source': 'tradus', 
57
                                'product_url': str(product_url), 
58
                                'in_stock': in_stock
59
                            })
60
                except Exception as e:
61
                    print e
62
                    pass
63
 
64
            except IndexError as iex:
65
                try:
66
                    price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
67
                    in_stock = 1
68
 
69
                    phones.append({
70
                                'name': str(name), 
71
                                'price': removePriceFormatting(price),
72
                                'source': 'tradus', 
73
                                'product_url': str(product_url), 
74
                                'in_stock': in_stock
75
                            })
76
 
77
                except Exception as ex:
78
                    print ex
79
                    pass
80
            except Exception as e:
81
                print e
82
                pass
5761 amar.kumar 83
        self.phones = phones
84
        return phones
5639 amar.kumar 85
 
86
    def setPageCount(self):
87
        self.currentPage = self.currentPage + 1
88
 
89
    def getNextUrl(self):
90
        time.sleep(1)
5761 amar.kumar 91
        if "mobile" in self.url:
92
            if TradusScraper.mobileCurrentPage < TradusScraper.mobilePageCount:
93
                TradusScraper.mobileCurrentPage += 1
94
                return 'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=%s' % (TradusScraper.mobileCurrentPage)
95
            else:
96
                return None
97
        elif "tablets" in self.url:
98
            if TradusScraper.tabletCurrentPage < TradusScraper.tabletPageCount:
99
                TradusScraper.tabletCurrentPage += 1
100
                return 'http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=%s' % (TradusScraper.tabletCurrentPage)
101
            else:
102
                return None
5639 amar.kumar 103
        else:
104
            return None
5761 amar.kumar 105
 
5639 amar.kumar 106
 
107
    def getDataFromProductPage(self, url):
108
        html = BaseScraper.read(self, url)
109
        soup = BeautifulSoup(html)
110
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
111
        price = soup.find('b', {'id': 'tPrice'}).string.strip()
112
        in_stock = 1
113
 
114
        data = {
115
            "product_url": str(url), 
116
            "source": "tradus", 
117
            "price": price, 
118
            "in_stock": 1, 
119
            "name": name
120
        }
121
        return data
122
 
123
if __name__ == '__main__':
124
    s = TradusScraper()
125
    '''html = BaseScraper.read(s,'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756')
126
    soup = BeautifulSoup(html)
127
 
128
    phones = []
129
    for div in soup.findAll('div', {'class': 'mainresult-show-right'}):
130
        try:
131
            anchor = div.find('a')
132
            name = anchor.contents[2].strip()
133
            if(len(name)== 0):
134
                name = anchor.contents[0].strip()
135
            product_url = anchor['href'].strip()
136
            price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
137
            in_stock = 1
138
 
139
            try:
140
                if price is None:
141
                    continue
142
                else:
143
                    phones.append({
144
                            'name': str(name), 
145
                            'price': removePriceFormatting(price),
146
                            'source': 'tradus', 
147
                            'product_url': str(product_url), 
148
                            'in_stock': in_stock
149
                        })
150
            except Exception as e:
151
                print e
152
                pass
153
 
154
        except IndexError as iex:
155
            try:
156
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
157
                in_stock = 1
158
 
159
                phones.append({
160
                            'name': str(name), 
161
                            'price': removePriceFormatting(price),
162
                            'source': 'tradus', 
163
                            'product_url': str(product_url), 
164
                            'in_stock': in_stock
165
                        })
166
 
167
            except Exception as ex:
168
                print ex
169
                pass
170
        except Exception as e:
171
            print e
172
            pass
173
    print phones'''
174
 
175
 
176
    data = s.getDataFromProductPage('http://www.tradus.com/samsung-galaxy-y-pro-duos-b5512-mobile-phone/p/MOB0000004549294')
177
    print data