Subversion Repositories SmartDukaan

Rev

Rev 5770 | Rev 6024 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5770 Rev 6022
Line 27... Line 27...
27
        self.phones = None
27
        self.phones = None
28
        #self.setPageCount()
28
        #self.setPageCount()
29
    
29
    
30
    def getPhones(self):
30
    def getPhones(self):
31
        phones = []
31
        phones = []
32
        for div in self.soup.findAll('div', {'class': 'mainresult-show-right'}):
32
        for div in self.soup.findAll('div', {'class': 'prod_main_div'}):
33
            try:
33
            try:
34
                anchor = div.find('a')
34
                productUrlContainer = div.find('div', {'class': 'product_name search-product-block'})
35
                if(len(anchor.contents)==1):
-
 
36
                    name = anchor.contents[0].strip()
35
                name = productUrlContainer.contents[1].string
37
                elif(anchor.contents[1].string =="Tablet"):
-
 
38
                    name = anchor.contents[0] + "Tablet"
-
 
39
                    if(len(anchor.contents)>2):
-
 
40
                        name = name + anchor.contents[2];
36
                product_url = "www.tradus.com" +productUrlContainer.contents[1]['href']
41
                else:
-
 
42
                    name = anchor.contents[2].strip()
-
 
43
                    if(len(name)== 0):
-
 
44
                        name = anchor.contents[0].strip()
37
                price = div.find('span', {'class':'numDiv_left'}).string.strip()
45
                product_url = anchor['href'].strip()
38
                price = removePriceFormatting(price)
46
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
-
 
47
                in_stock = 1
39
                in_stock = 1
48
                
40
                
49
                try:
41
                try:
50
                    if price is None:
42
                    if price is None:
51
                        continue
43
                        continue
Line 105... Line 97...
105
            
97
            
106
 
98
 
107
    def getDataFromProductPage(self, url):
99
    def getDataFromProductPage(self, url):
108
        html = BaseScraper.read(self, url)
100
        html = BaseScraper.read(self, url)
109
        soup = BeautifulSoup(html)
101
        soup = BeautifulSoup(html)
110
        name = soup.find('h1',{'class': 'left-content-product-heading'}).string.strip()
102
        name = soup.find('span',{'itemprop':'name'}).string.strip()
111
        price = soup.find('b', {'id': 'tPrice'}).string.strip()
103
        price= soup.find('span',{'class':'mrp3'}).contents[0].strip()
112
        if("Rs." in price):
-
 
113
            price = price[4:]
104
        price = removePriceFormatting(price)
114
        in_stock = 1
105
        in_stock = 1
115
        
106
        
116
        data = {
107
        data = {
117
            "product_url": str(url), 
108
            "product_url": str(url), 
118
            "source": "tradus", 
109
            "source": "tradus", 
Line 120... Line 111...
120
            "in_stock": 1, 
111
            "in_stock": 1, 
121
            "name": name
112
            "name": name
122
        }
113
        }
123
        return data
114
        return data
124
 
115
 
-
 
116
def removePriceFormatting(price_string):
-
 
117
    return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace(' ', '').split('.')[0]
-
 
118
 
125
if __name__ == '__main__':
119
if __name__ == '__main__':
126
    s = TradusScraper()
120
    s = TradusScraper()
127
    '''html = BaseScraper.read(s,'http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756')
-
 
128
    soup = BeautifulSoup(html)
-
 
129
    
-
 
130
    phones = []
-
 
131
    for div in soup.findAll('div', {'class': 'mainresult-show-right'}):
-
 
132
        try:
-
 
133
            anchor = div.find('a')
-
 
134
            name = anchor.contents[2].strip()
-
 
135
            if(len(name)== 0):
-
 
136
                name = anchor.contents[0].strip()
-
 
137
            product_url = anchor['href'].strip()
-
 
138
            price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0]('span')[0].contents[0].strip()[3:]
-
 
139
            in_stock = 1
-
 
140
            
-
 
141
            try:
-
 
142
                if price is None:
-
 
143
                    continue
-
 
144
                else:
-
 
145
                    phones.append({
-
 
146
                            'name': str(name), 
-
 
147
                            'price': removePriceFormatting(price),
-
 
148
                            'source': 'tradus', 
-
 
149
                            'product_url': str(product_url), 
-
 
150
                            'in_stock': in_stock
-
 
151
                        })
-
 
152
            except Exception as e:
-
 
153
                print e
-
 
154
                pass
-
 
155
            
-
 
156
        except IndexError as iex:
-
 
157
            try:
-
 
158
                price = div.find('div', {'class': 'mainresult-show-right-startrate'})('span')[0].contents[0].strip()[18:]
-
 
159
                in_stock = 1
-
 
160
                
-
 
161
                phones.append({
-
 
162
                            'name': str(name), 
-
 
163
                            'price': removePriceFormatting(price),
-
 
164
                            'source': 'tradus', 
-
 
165
                            'product_url': str(product_url), 
-
 
166
                            'in_stock': in_stock
-
 
167
                        })
-
 
168
                
-
 
169
            except Exception as ex:
-
 
170
                print ex
-
 
171
                pass
-
 
172
        except Exception as e:
-
 
173
            print e
-
 
174
            pass
-
 
175
    print phones'''
-
 
176
    
-
 
177
    
-
 
178
    data = s.getDataFromProductPage('http://www.tradus.com/samsung-galaxy-y-pro-duos-b5512-mobile-phone/p/MOB0000004549294')
121
    data = s.getDataFromProductPage('http://www.tradus.com/zing-q800-dual-sim-mobile-phone/p/MOB0000004506663')
179
    print data
122
    print data
180
123