Subversion Repositories SmartDukaan

Rev

Rev 14745 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 14745 Rev 15265
Line 55... Line 55...
55
        return self.scrape(self.soup,url)
55
        return self.scrape(self.soup,url)
56
    
56
    
57
    def scrape(self,soup,url):
57
    def scrape(self,soup,url):
58
        print "Inside json creator for %s" %(url)
58
        print "Inside json creator for %s" %(url)
59
        info = []
59
        info = []
-
 
60
        buyBoxInfo = []
60
        oddSeller = soup.findAll("div" , {"class" : "line seller-item odd "})
61
        oddSeller = soup.findAll("div" , {"class" : "line seller-item odd "})
61
        for data in oddSeller:
62
        for data in oddSeller:
62
            temp={}
63
            temp={}
63
            price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.').strip()
64
            price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.').strip()
64
            temp['sellingPrice']=float(price)
65
            temp['sellingPrice']=float(price)
65
            for metrics in data.find("div",{"class":"fk-text-right"}):
66
            for metrics in data.find("div",{"class":"fk-text-right"}):
66
                try:
67
                try:
67
                    metric = metrics.findAll('input', {'type': 'submit'})
68
                    metric = metrics.findAll('input', {'type': 'submit'})
68
                except AttributeError:
69
                except AttributeError:
69
                    continue
70
                    continue
-
 
71
                try:
-
 
72
                    inputTags = metric[0]['data-lst-buytrend']
-
 
73
                except TypeError:
-
 
74
                    continue
70
                dataMetrics = metric[0]['data-listing-metrics']
75
                dataMetrics = metric[0]['data-listing-metrics']
71
                dataMetric = dataMetrics.split(';')
76
                dataMetric = dataMetrics.split(';')
72
                temp['sellingPriceMetric'] = float(dataMetric[1])
77
                temp['sellingPriceMetric'] = float(dataMetric[1])
73
                try:
78
                try:
74
                    temp['shippingFee'] = float(dataMetric[2])
79
                    temp['shippingFee'] = float(dataMetric[2])
75
                except:
80
                except:
76
                    temp['shippingFee'] = 0.0
81
                    temp['shippingFee'] = 0.0
-
 
82
                try:
-
 
83
                    buyTrend = inputTags[0:str(inputTags).index('NWSR')].replace('_','')
-
 
84
                except ValueError:
-
 
85
                    buyTrend = inputTags[0:str(inputTags).index('WSR')].replace('_','')
-
 
86
                temp['buyTrend']=buyTrend.strip()
77
                temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']  
87
                temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']
-
 
88
                if temp['buyTrend'] in  ('PrefNCheap','PrefCheap'):
-
 
89
                    buyBoxInfo.append(temp)
78
                info.append(temp)
90
                info.append(temp)
79
        evenSeller = soup.findAll("div" , {"class" : "line seller-item even "})
91
        evenSeller = soup.findAll("div" , {"class" : "line seller-item even "})
80
        for data in evenSeller:
92
        for data in evenSeller:
81
            temp={}
93
            temp={}
82
            price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.')
94
            price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.')
Line 84... Line 96...
84
            for metrics in data.find("div",{"class":"fk-text-right"}):
96
            for metrics in data.find("div",{"class":"fk-text-right"}):
85
                try:
97
                try:
86
                    metric = metrics.findAll('input', {'type': 'submit'})
98
                    metric = metrics.findAll('input', {'type': 'submit'})
87
                except AttributeError:
99
                except AttributeError:
88
                    continue
100
                    continue
-
 
101
                try:
-
 
102
                    inputTags = metric[0]['data-lst-buytrend']
-
 
103
                except TypeError:
-
 
104
                    continue
89
                dataMetrics = metric[0]['data-listing-metrics']
105
                dataMetrics = metric[0]['data-listing-metrics']
90
                dataMetric = dataMetrics.split(';')
106
                dataMetric = dataMetrics.split(';')
91
                temp['sellingPriceMetric'] = float(dataMetric[1])
107
                temp['sellingPriceMetric'] = float(dataMetric[1])
92
                try:
108
                try:
93
                    temp['shippingFee'] = float(dataMetric[2])
109
                    temp['shippingFee'] = float(dataMetric[2])
94
                except:
110
                except:
95
                    temp['shippingFee'] = 0.0
111
                    temp['shippingFee'] = 0.0
-
 
112
                try:
-
 
113
                    buyTrend = inputTags[0:str(inputTags).index('NWSR')].replace('_','')
-
 
114
                except ValueError:
-
 
115
                    buyTrend = inputTags[0:str(inputTags).index('WSR')].replace('_','')
-
 
116
                temp['buyTrend']=buyTrend.strip()
96
                temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']  
117
                temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']
-
 
118
                if temp['buyTrend'] in  ('PrefNCheap','PrefCheap'):
-
 
119
                    buyBoxInfo.append(temp)  
97
                info.append(temp)
120
                info.append(temp)
98
        print info
121
        print info
-
 
122
        print "==========="
-
 
123
        print buyBoxInfo
99
        print "Returning Json response from flipkart for %s" %(url)
124
        print "Returning Json response from flipkart for %s" %(url)
100
        return info
125
        return info, buyBoxInfo
101
 
126
 
102
if __name__ == '__main__':
127
if __name__ == '__main__':
103
    scraper = FlipkartScraper()
128
    scraper = FlipkartScraper()
104
    scraper.read('http://www.flipkart.com/ps/MOBDZB3Q8WJNKVHG')
129
    x, z = scraper.read('http://www.flipkart.com/ps/MOBDUZSYZCA7HDYW')
-
 
130
    for y in x:
-
 
131
        print y
-
 
132
    print "==========="
-
 
133
    for t in z:
-
 
134
        print t