| Line 1... |
Line 1... |
| 1 |
import pymongo
|
1 |
import pymongo
|
| 2 |
from dtr.utils.utils import to_java_date, getNlcPoints
|
2 |
from dtr.utils.utils import to_java_date
|
| 3 |
from datetime import datetime, timedelta
|
3 |
from datetime import datetime, timedelta
|
| 4 |
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
|
- |
|
| 5 |
from multiprocessing import Pool as ThreadPool
|
- |
|
| 6 |
from multiprocessing import cpu_count
|
- |
|
| 7 |
import optparse
|
4 |
import optparse
|
| 8 |
from dtr.storage.MemCache import MemCache
|
5 |
from dtr.storage.MemCache import MemCache
|
| 9 |
from dtr.utils.utils import getCashBack
|
6 |
from dtr.utils.utils import getCashBack
|
| 10 |
import traceback
|
7 |
from shop2020.model.v1.catalog.script import AmazonAsyncScraper
|
| - |
|
8 |
|
| 11 |
|
9 |
|
| 12 |
con = None
|
10 |
con = None
|
| 13 |
|
11 |
|
| 14 |
parser = optparse.OptionParser()
|
12 |
parser = optparse.OptionParser()
|
| 15 |
parser.add_option("-m", "--m", dest="mongoHost",
|
13 |
parser.add_option("-m", "--m", dest="mongoHost",
|
| Line 18... |
Line 16... |
| 18 |
metavar="mongo_host")
|
16 |
metavar="mongo_host")
|
| 19 |
|
17 |
|
| 20 |
(options, args) = parser.parse_args()
|
18 |
(options, args) = parser.parse_args()
|
| 21 |
|
19 |
|
| 22 |
mc = MemCache(options.mongoHost)
|
20 |
mc = MemCache(options.mongoHost)
|
| 23 |
|
- |
|
| - |
|
21 |
amScraper = AmazonAsyncScraper.Products("AKIAII3SGRXBJDPCHSGQ", "B92xTbNBTYygbGs98w01nFQUhbec1pNCkCsKVfpg", "AF6E3O0VE0X4D")
|
| - |
|
22 |
marketplaceId = 'A21TJRUUN4KGV'
|
| 24 |
|
23 |
|
| 25 |
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
|
24 |
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
|
| 26 |
|
25 |
|
| 27 |
def get_mongo_connection(host=options.mongoHost, port=27017):
|
26 |
def get_mongo_connection(host=options.mongoHost, port=27017):
|
| 28 |
global con
|
27 |
global con
|
| Line 102... |
Line 101... |
| 102 |
else:
|
101 |
else:
|
| 103 |
data = toScrapMap.get(manualDeal['sku'])
|
102 |
data = toScrapMap.get(manualDeal['sku'])
|
| 104 |
data['dealFlag'] = 1
|
103 |
data['dealFlag'] = 1
|
| 105 |
data['dealType'] = manualDeal['dealType']
|
104 |
data['dealType'] = manualDeal['dealType']
|
| 106 |
data['dealUrl'] = manualDeal['dealUrl']
|
105 |
data['dealUrl'] = manualDeal['dealUrl']
|
| - |
|
106 |
|
| 107 |
pool = ThreadPool(cpu_count() * 2)
|
107 |
count = 0
|
| - |
|
108 |
temp = {}
|
| 108 |
pool.map(scrapeAmazon,toScrapMap.values())
|
109 |
for v in toScrapMap.itervalues():
|
| 109 |
pool.close()
|
110 |
print v
|
| - |
|
111 |
#Lets validate identifier
|
| - |
|
112 |
if len(str(v['identifier']).strip()) !=10:
|
| 110 |
pool.join()
|
113 |
continue
|
| 111 |
print "joining threads at %s"%(str(datetime.now()))
|
114 |
temp[str(v['identifier']).strip().upper()] = v
|
| - |
|
115 |
count = count+1
|
| - |
|
116 |
if count == 20:
|
| - |
|
117 |
scrapeAmazon(temp)
|
| - |
|
118 |
temp = {}
|
| - |
|
119 |
count =0
|
| 112 |
|
120 |
|
| 113 |
|
121 |
|
| 114 |
def scrapeAmazon(data):
|
122 |
def scrapeAmazon(dataMap):
|
| 115 |
inStock = 0
|
- |
|
| 116 |
dealScraping = False
|
123 |
asinPricingMap = amScraper.get_competitive_pricing_for_asin(marketplaceId, dataMap.keys())
|
| 117 |
print str(data['identifier'])
|
124 |
print "asinPricingMap ",asinPricingMap
|
| 118 |
if data['identifier'] is None or len(data['identifier'].strip())==0:
|
125 |
for k, data in dataMap.iteritems():
|
| 119 |
return
|
126 |
print data
|
| 120 |
|
- |
|
| 121 |
if data.get('ignorePricing') ==1:
|
- |
|
| 122 |
print "Ignored items returning for %d"%(data['_id'])
|
- |
|
| 123 |
return
|
127 |
inStock = 0
|
| 124 |
|
- |
|
| 125 |
try:
|
- |
|
| 126 |
if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
|
128 |
print str(data['identifier'])
|
| 127 |
print "sku id is already updated",data['_id']
|
129 |
if data['identifier'] is None or len(data['identifier'].strip())==0:
|
| 128 |
return
|
130 |
return
|
| 129 |
except:
|
- |
|
| 130 |
pass
|
- |
|
| 131 |
|
- |
|
| 132 |
url = "http://www.amazon.in/gp/aw/ol/%s?o=New&op=1"%(data['identifier'])
|
- |
|
| 133 |
|
- |
|
| 134 |
try:
|
- |
|
| 135 |
if data['dealFlag'] ==1 and data['dealType'] ==1:
|
- |
|
| 136 |
deal_url = data['dealUrl'].strip()
|
- |
|
| 137 |
dealScraping = True
|
- |
|
| 138 |
except:
|
- |
|
| 139 |
data['dealFlag'] = 0
|
- |
|
| 140 |
data['dealType'] = 0
|
- |
|
| 141 |
data['dealUrl'] = ""
|
- |
|
| 142 |
|
- |
|
| 143 |
print url
|
- |
|
| 144 |
lowestPrice = 0.0
|
- |
|
| 145 |
|
- |
|
| 146 |
if not dealScraping:
|
- |
|
| 147 |
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
|
- |
|
| 148 |
lowestPrice = scraperAmazon.read(url)
|
- |
|
| 149 |
else:
|
- |
|
| 150 |
dealScraperAmazon = AmazonDealScraper.AmazonScraper()
|
- |
|
| 151 |
lowestPrice = dealScraperAmazon.read(deal_url)
|
- |
|
| 152 |
if lowestPrice == 0:
|
- |
|
| 153 |
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
|
- |
|
| 154 |
lowestPrice = scraperAmazon.read(url)
|
- |
|
| 155 |
dealScraping = False
|
- |
|
| 156 |
|
131 |
|
| 157 |
print lowestPrice
|
132 |
if data.get('ignorePricing') ==1:
|
| 158 |
if lowestPrice > 0:
|
133 |
print "Ignored items returning for %d"%(data['_id'])
|
| 159 |
inStock = 1
|
134 |
return
|
| 160 |
netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], lowestPrice)
|
- |
|
| 161 |
else:
|
- |
|
| 162 |
netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], data['available_price'])
|
- |
|
| 163 |
|
135 |
|
| 164 |
print lowestPrice
|
136 |
try:
|
| - |
|
137 |
if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
|
| 165 |
print inStock
|
138 |
print "sku id is already updated",data['_id']
|
| 166 |
if lowestPrice > 0:
|
139 |
return
|
| 167 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
|
- |
|
| 168 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'],'netPriceAfterCashBack':netPriceAfterCashBack}}, multi=True)
|
140 |
except:
|
| 169 |
else:
|
141 |
pass
|
| 170 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
|
- |
|
| 171 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'], 'netPriceAfterCashBack':netPriceAfterCashBack}})
|
- |
|
| 172 |
|
142 |
|
| - |
|
143 |
lowestPrice = asinPricingMap.get(k)
|
| - |
|
144 |
print lowestPrice
|
| - |
|
145 |
if lowestPrice > 0:
|
| - |
|
146 |
inStock = 1
|
| - |
|
147 |
netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], lowestPrice)
|
| - |
|
148 |
else:
|
| - |
|
149 |
netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], data['available_price'])
|
| - |
|
150 |
|
| - |
|
151 |
print lowestPrice
|
| - |
|
152 |
print inStock
|
| - |
|
153 |
if lowestPrice > 0:
|
| - |
|
154 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
|
| 173 |
try:
|
155 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'],'netPriceAfterCashBack':netPriceAfterCashBack}}, multi=True)
|
| - |
|
156 |
else:
|
| - |
|
157 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
|
| - |
|
158 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'], 'netPriceAfterCashBack':netPriceAfterCashBack}})
|
| - |
|
159 |
|
| - |
|
160 |
try:
|
| 174 |
recomputeDeal(data)
|
161 |
recomputeDeal(data)
|
| 175 |
except:
|
162 |
except:
|
| 176 |
print "Unable to compute deal for ",data['skuBundleId']
|
163 |
print "Unable to compute deal for ",data['skuBundleId']
|
| 177 |
|
164 |
|
| 178 |
def populateNegativeDeals():
|
165 |
def populateNegativeDeals():
|
| 179 |
negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
|
166 |
negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
|
| 180 |
mc.set("negative_deals", negativeDeals, 600)
|
167 |
mc.set("negative_deals", negativeDeals, 600)
|
| 181 |
|
168 |
|