| 11933 |
kshitij.so |
1 |
from elixir import *
|
|
|
2 |
from shop2020.config.client.ConfigClient import ConfigClient
|
|
|
3 |
from shop2020.clients.CatalogClient import CatalogClient
|
|
|
4 |
from shop2020.model.v1.catalog.impl import DataService
|
|
|
5 |
from shop2020.model.v1.catalog.impl.DataService import PrivateDealsPriceComparison, PrivateDeals, Amazonlisted, \
|
|
|
6 |
SnapdealItem, FlipkartItem
|
|
|
7 |
from shop2020.model.v1.catalog.script import FlipkartScraper, AmazonScraper
|
|
|
8 |
from operator import itemgetter
|
|
|
9 |
import urllib2
|
|
|
10 |
import time
|
|
|
11 |
import simplejson as json
|
|
|
12 |
from datetime import datetime
|
|
|
13 |
|
|
|
14 |
|
|
|
15 |
config_client = ConfigClient()
|
|
|
16 |
host = config_client.get_property('staging_hostname')
|
|
|
17 |
DataService.initialize(db_hostname=host)
|
|
|
18 |
|
|
|
19 |
scrapedInfo = []
|
|
|
20 |
scraperFk = FlipkartScraper.FlipkartScraper()
|
|
|
21 |
scraperAmazon = AmazonScraper.AmazonScraper()
|
|
|
22 |
|
|
|
23 |
class __ScrapingInfo:
|
|
|
24 |
def __init__(self,itemId, dealPrice, saholicPrice, sdPrice, fkPrice, amazonPrice, supc, fsn, asin):
|
|
|
25 |
self.itemId = itemId
|
|
|
26 |
self.dealPrice = dealPrice
|
|
|
27 |
self.saholicPrice = saholicPrice
|
|
|
28 |
self.sdPrice = sdPrice
|
|
|
29 |
self.fkPrice = fkPrice
|
|
|
30 |
self.amazonPrice = amazonPrice
|
|
|
31 |
self.supc = supc
|
|
|
32 |
self.fsn = fsn
|
|
|
33 |
self.asin = asin
|
|
|
34 |
|
|
|
35 |
def getAllActivePDFromMaster():
|
|
|
36 |
catalog_client = CatalogClient().get_client()
|
|
|
37 |
allActivePrivateDeals = catalog_client.getAllActivePrivateDeals(None, 0)
|
|
|
38 |
return allActivePrivateDeals
|
|
|
39 |
|
|
|
40 |
def scrapeSnapdeal():
|
|
|
41 |
for data in scrapedInfo:
|
|
|
42 |
if data.supc is None or len(data.supc)==0:
|
|
|
43 |
continue
|
|
|
44 |
try:
|
|
|
45 |
url="http://www.snapdeal.com/acors/json/gvbps?supc=%s&catId=91&sort=sellingPrice"%(data.supc)
|
|
|
46 |
print url
|
|
|
47 |
time.sleep(1)
|
|
|
48 |
req = urllib2.Request(url)
|
|
|
49 |
response = urllib2.urlopen(req)
|
|
|
50 |
json_input = response.read()
|
|
|
51 |
vendorInfo = json.loads(json_input)
|
|
|
52 |
lowestOfferPrice, iterator = (0,)*2
|
|
|
53 |
for vendor in vendorInfo:
|
|
|
54 |
if iterator == 0:
|
|
|
55 |
lowestOfferPrice = vendor['sellingPrice']
|
|
|
56 |
break
|
|
|
57 |
data.sdPrice = lowestOfferPrice
|
|
|
58 |
except:
|
|
|
59 |
continue
|
|
|
60 |
|
|
|
61 |
def scrapeFlipkart():
|
|
|
62 |
for data in scrapedInfo:
|
|
|
63 |
if data.fsn is None or len(data.fsn)==0:
|
|
|
64 |
continue
|
|
|
65 |
try:
|
|
|
66 |
url = "http://www.flipkart.com/ps/%s"%(data.fsn)
|
| 12240 |
kshitij.so |
67 |
vendorsData = scraperFk.read(url)
|
| 11933 |
kshitij.so |
68 |
sortedVendorsData = []
|
|
|
69 |
sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
|
|
|
70 |
lowestSp, iterator = (0,)*2
|
|
|
71 |
for vData in sortedVendorsData:
|
|
|
72 |
if iterator == 0:
|
|
|
73 |
lowestSp = vData['sellingPrice']
|
|
|
74 |
data.fkPrice = lowestSp
|
|
|
75 |
break
|
|
|
76 |
except:
|
|
|
77 |
continue
|
|
|
78 |
|
|
|
79 |
def scrapeAmazon():
|
|
|
80 |
for data in scrapedInfo:
|
|
|
81 |
if data.asin is None or len(data.asin)==0:
|
|
|
82 |
continue
|
|
|
83 |
try:
|
|
|
84 |
url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data.asin)
|
|
|
85 |
scraperAmazon.read(url)
|
|
|
86 |
lowestSp = scraperAmazon.createData()
|
|
|
87 |
data.amazonPrice = lowestSp
|
|
|
88 |
except:
|
|
|
89 |
continue
|
|
|
90 |
|
|
|
91 |
def populateScrapingInfo():
|
|
|
92 |
for data in scrapedInfo:
|
|
|
93 |
amazon = Amazonlisted.get_by(itemId=data.itemId)
|
|
|
94 |
snapdeal = SnapdealItem.get_by(item_id=data.itemId)
|
|
|
95 |
flipkart = FlipkartItem.get_by(item_id=data.itemId)
|
|
|
96 |
if amazon is not None:
|
|
|
97 |
data.asin = amazon.asin
|
|
|
98 |
if snapdeal is not None:
|
|
|
99 |
data.supc = snapdeal.supc
|
|
|
100 |
if flipkart is not None:
|
|
|
101 |
data.fsn = flipkart.flipkartSerialNumber
|
|
|
102 |
|
|
|
103 |
def populateOurPrices():
|
|
|
104 |
catalog_client = CatalogClient().get_client()
|
|
|
105 |
for data in scrapedInfo:
|
|
|
106 |
cat_item = catalog_client.getItem(data.itemId)
|
|
|
107 |
data.saholicPrice = cat_item.sellingPrice
|
|
|
108 |
|
|
|
109 |
def commitData():
|
|
|
110 |
PrivateDealsPriceComparison.query.delete()
|
|
|
111 |
session.commit()
|
|
|
112 |
for data in scrapedInfo:
|
|
|
113 |
pdComp = PrivateDealsPriceComparison()
|
|
|
114 |
pdComp.item_id = data.itemId
|
|
|
115 |
pdComp.dealPrice = data.dealPrice
|
|
|
116 |
pdComp.saholicPrice = data.saholicPrice
|
|
|
117 |
pdComp.sdPrice = data.sdPrice
|
|
|
118 |
pdComp.fkPrice = data.fkPrice
|
|
|
119 |
pdComp.amazonPrice = data.amazonPrice
|
| 12152 |
kshitij.so |
120 |
pdComp.asin = data.asin
|
|
|
121 |
pdComp.fsn = data.fsn
|
|
|
122 |
pdComp.supc = data.supc
|
| 11933 |
kshitij.so |
123 |
pdComp.lastProcessedTimestamp = datetime.now()
|
|
|
124 |
session.commit()
|
|
|
125 |
|
|
|
126 |
|
|
|
127 |
def main():
|
|
|
128 |
privateDeals = getAllActivePDFromMaster()
|
|
|
129 |
global scrapedInfo
|
|
|
130 |
for itemId in privateDeals.iterkeys():
|
|
|
131 |
temp = __ScrapingInfo(itemId,privateDeals.get(itemId).dealPrice,None,None,None,None,None,None,None )
|
|
|
132 |
scrapedInfo.append(temp)
|
|
|
133 |
privateDeals = {}
|
|
|
134 |
populateScrapingInfo()
|
|
|
135 |
scrapeSnapdeal()
|
|
|
136 |
scrapeFlipkart()
|
|
|
137 |
scrapeAmazon()
|
|
|
138 |
populateOurPrices()
|
|
|
139 |
commitData()
|
|
|
140 |
|
|
|
141 |
|
|
|
142 |
|
|
|
143 |
if __name__=='__main__':
|
|
|
144 |
main()
|