| Line 55... |
Line 55... |
| 55 |
return self.scrape(self.soup,url)
|
55 |
return self.scrape(self.soup,url)
|
| 56 |
|
56 |
|
| 57 |
def scrape(self,soup,url):
|
57 |
def scrape(self,soup,url):
|
| 58 |
print "Inside json creator for %s" %(url)
|
58 |
print "Inside json creator for %s" %(url)
|
| 59 |
info = []
|
59 |
info = []
|
| - |
|
60 |
buyBoxInfo = []
|
| 60 |
oddSeller = soup.findAll("div" , {"class" : "line seller-item odd "})
|
61 |
oddSeller = soup.findAll("div" , {"class" : "line seller-item odd "})
|
| 61 |
for data in oddSeller:
|
62 |
for data in oddSeller:
|
| 62 |
temp={}
|
63 |
temp={}
|
| 63 |
price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.').strip()
|
64 |
price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.').strip()
|
| 64 |
temp['sellingPrice']=float(price)
|
65 |
temp['sellingPrice']=float(price)
|
| 65 |
for metrics in data.find("div",{"class":"fk-text-right"}):
|
66 |
for metrics in data.find("div",{"class":"fk-text-right"}):
|
| 66 |
try:
|
67 |
try:
|
| 67 |
metric = metrics.findAll('input', {'type': 'submit'})
|
68 |
metric = metrics.findAll('input', {'type': 'submit'})
|
| 68 |
except AttributeError:
|
69 |
except AttributeError:
|
| 69 |
continue
|
70 |
continue
|
| - |
|
71 |
try:
|
| - |
|
72 |
inputTags = metric[0]['data-lst-buytrend']
|
| - |
|
73 |
except TypeError:
|
| - |
|
74 |
continue
|
| 70 |
dataMetrics = metric[0]['data-listing-metrics']
|
75 |
dataMetrics = metric[0]['data-listing-metrics']
|
| 71 |
dataMetric = dataMetrics.split(';')
|
76 |
dataMetric = dataMetrics.split(';')
|
| 72 |
temp['sellingPriceMetric'] = float(dataMetric[1])
|
77 |
temp['sellingPriceMetric'] = float(dataMetric[1])
|
| 73 |
try:
|
78 |
try:
|
| 74 |
temp['shippingFee'] = float(dataMetric[2])
|
79 |
temp['shippingFee'] = float(dataMetric[2])
|
| 75 |
except:
|
80 |
except:
|
| 76 |
temp['shippingFee'] = 0.0
|
81 |
temp['shippingFee'] = 0.0
|
| - |
|
82 |
try:
|
| - |
|
83 |
buyTrend = inputTags[0:str(inputTags).index('NWSR')].replace('_','')
|
| - |
|
84 |
except ValueError:
|
| - |
|
85 |
buyTrend = inputTags[0:str(inputTags).index('WSR')].replace('_','')
|
| - |
|
86 |
temp['buyTrend']=buyTrend.strip()
|
| 77 |
temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']
|
87 |
temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']
|
| - |
|
88 |
if temp['buyTrend'] in ('PrefNCheap','PrefCheap'):
|
| - |
|
89 |
buyBoxInfo.append(temp)
|
| 78 |
info.append(temp)
|
90 |
info.append(temp)
|
| 79 |
evenSeller = soup.findAll("div" , {"class" : "line seller-item even "})
|
91 |
evenSeller = soup.findAll("div" , {"class" : "line seller-item even "})
|
| 80 |
for data in evenSeller:
|
92 |
for data in evenSeller:
|
| 81 |
temp={}
|
93 |
temp={}
|
| 82 |
price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.')
|
94 |
price = data.find('span', attrs={'class' : re.compile('pxs-final-price.*')}).string.strip('Rs.')
|
| Line 84... |
Line 96... |
| 84 |
for metrics in data.find("div",{"class":"fk-text-right"}):
|
96 |
for metrics in data.find("div",{"class":"fk-text-right"}):
|
| 85 |
try:
|
97 |
try:
|
| 86 |
metric = metrics.findAll('input', {'type': 'submit'})
|
98 |
metric = metrics.findAll('input', {'type': 'submit'})
|
| 87 |
except AttributeError:
|
99 |
except AttributeError:
|
| 88 |
continue
|
100 |
continue
|
| - |
|
101 |
try:
|
| - |
|
102 |
inputTags = metric[0]['data-lst-buytrend']
|
| - |
|
103 |
except TypeError:
|
| - |
|
104 |
continue
|
| 89 |
dataMetrics = metric[0]['data-listing-metrics']
|
105 |
dataMetrics = metric[0]['data-listing-metrics']
|
| 90 |
dataMetric = dataMetrics.split(';')
|
106 |
dataMetric = dataMetrics.split(';')
|
| 91 |
temp['sellingPriceMetric'] = float(dataMetric[1])
|
107 |
temp['sellingPriceMetric'] = float(dataMetric[1])
|
| 92 |
try:
|
108 |
try:
|
| 93 |
temp['shippingFee'] = float(dataMetric[2])
|
109 |
temp['shippingFee'] = float(dataMetric[2])
|
| 94 |
except:
|
110 |
except:
|
| 95 |
temp['shippingFee'] = 0.0
|
111 |
temp['shippingFee'] = 0.0
|
| - |
|
112 |
try:
|
| - |
|
113 |
buyTrend = inputTags[0:str(inputTags).index('NWSR')].replace('_','')
|
| - |
|
114 |
except ValueError:
|
| - |
|
115 |
buyTrend = inputTags[0:str(inputTags).index('WSR')].replace('_','')
|
| - |
|
116 |
temp['buyTrend']=buyTrend.strip()
|
| 96 |
temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']
|
117 |
temp['sellingPrice'] = temp['sellingPrice'] + temp['shippingFee']
|
| - |
|
118 |
if temp['buyTrend'] in ('PrefNCheap','PrefCheap'):
|
| - |
|
119 |
buyBoxInfo.append(temp)
|
| 97 |
info.append(temp)
|
120 |
info.append(temp)
|
| 98 |
print info
|
121 |
print info
|
| - |
|
122 |
print "==========="
|
| - |
|
123 |
print buyBoxInfo
|
| 99 |
print "Returning Json response from flipkart for %s" %(url)
|
124 |
print "Returning Json response from flipkart for %s" %(url)
|
| 100 |
return info
|
125 |
return info, buyBoxInfo
|
| 101 |
|
126 |
|
| 102 |
if __name__ == '__main__':
|
127 |
if __name__ == '__main__':
|
| 103 |
scraper = FlipkartScraper()
|
128 |
scraper = FlipkartScraper()
|
| 104 |
scraper.read('http://www.flipkart.com/ps/MOBDZB3Q8WJNKVHG')
|
129 |
x, z = scraper.read('http://www.flipkart.com/ps/MOBDUZSYZCA7HDYW')
|
| - |
|
130 |
for y in x:
|
| - |
|
131 |
print y
|
| - |
|
132 |
print "==========="
|
| - |
|
133 |
for t in z:
|
| - |
|
134 |
print t
|