| Line 1... |
Line 1... |
| 1 |
import pymongo
|
1 |
import pymongo
|
| 2 |
from dtr.utils.utils import to_java_date
|
2 |
from dtr.utils.utils import to_java_date
|
| 3 |
from datetime import datetime, timedelta
|
3 |
from datetime import datetime, timedelta
|
| 4 |
from dtr.utils import AmazonPriceOnlyScraper
|
4 |
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
|
| 5 |
from multiprocessing import Pool as ThreadPool
|
5 |
from multiprocessing import Pool as ThreadPool
|
| 6 |
from multiprocessing import cpu_count
|
6 |
from multiprocessing import cpu_count
|
| 7 |
import optparse
|
7 |
import optparse
|
| 8 |
|
8 |
|
| 9 |
con = None
|
9 |
con = None
|
| Line 38... |
Line 38... |
| 38 |
dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
|
38 |
dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
|
| 39 |
for deal in dealFlagged:
|
39 |
for deal in dealFlagged:
|
| 40 |
if not toScrapMap.has_key(deal['_id']):
|
40 |
if not toScrapMap.has_key(deal['_id']):
|
| 41 |
data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
|
41 |
data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
|
| 42 |
toScrapMap[deal['_id']] = data[0]
|
42 |
toScrapMap[deal['_id']] = data[0]
|
| - |
|
43 |
manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
|
| - |
|
44 |
for manualDeal in manualDeals:
|
| - |
|
45 |
if not toScrapMap.has_key(manualDeal['sku']):
|
| - |
|
46 |
data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
|
| - |
|
47 |
if len(data) > 0:
|
| - |
|
48 |
data[0]['dealFlag'] = 1
|
| - |
|
49 |
data[0]['dealType'] = manualDeal['dealType']
|
| - |
|
50 |
toScrapMap[manualDeal['sku']] = data[0]
|
| - |
|
51 |
else:
|
| - |
|
52 |
data = toScrapMap.get(manualDeal['sku'])
|
| - |
|
53 |
data['dealFlag'] = 1
|
| - |
|
54 |
data['dealType'] = manualDeal['dealType']
|
| 43 |
pool = ThreadPool(cpu_count() * 2)
|
55 |
pool = ThreadPool(cpu_count() * 2)
|
| 44 |
pool.map(scrapeAmazon,toScrapMap.values())
|
56 |
pool.map(scrapeAmazon,toScrapMap.values())
|
| 45 |
pool.close()
|
57 |
pool.close()
|
| 46 |
pool.join()
|
58 |
pool.join()
|
| 47 |
print "joining threads at %s"%(str(datetime.now()))
|
59 |
print "joining threads at %s"%(str(datetime.now()))
|
| 48 |
|
60 |
|
| 49 |
|
61 |
|
| 50 |
def scrapeAmazon(data):
|
62 |
def scrapeAmazon(data):
|
| 51 |
inStock = 0
|
63 |
inStock = 0
|
| - |
|
64 |
dealScraping = False
|
| 52 |
print str(data['identifier'])
|
65 |
print str(data['identifier'])
|
| 53 |
if data['identifier'] is None or len(data['identifier'].strip())==0:
|
66 |
if data['identifier'] is None or len(data['identifier'].strip())==0:
|
| 54 |
return
|
67 |
return
|
| 55 |
|
68 |
|
| 56 |
try:
|
69 |
try:
|
| Line 59... |
Line 72... |
| 59 |
return
|
72 |
return
|
| 60 |
except:
|
73 |
except:
|
| 61 |
pass
|
74 |
pass
|
| 62 |
|
75 |
|
| 63 |
url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
|
76 |
url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
|
| - |
|
77 |
|
| - |
|
78 |
try:
|
| - |
|
79 |
if data['dealFlag'] ==1 and data['dealType'] ==1:
|
| - |
|
80 |
deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
|
| - |
|
81 |
dealScraping = True
|
| - |
|
82 |
except:
|
| - |
|
83 |
data['dealFlag'] = 0
|
| - |
|
84 |
data['dealType'] = 0
|
| - |
|
85 |
|
| 64 |
print url
|
86 |
print url
|
| 65 |
lowestPrice = 0.0
|
87 |
lowestPrice = 0.0
|
| - |
|
88 |
|
| - |
|
89 |
if not dealScraping:
|
| 66 |
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
|
90 |
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
|
| 67 |
lowestPrice = scraperAmazon.read(url)
|
91 |
lowestPrice = scraperAmazon.read(url)
|
| - |
|
92 |
else:
|
| - |
|
93 |
dealScraperAmazon = AmazonDealScraper.AmazonScraper()
|
| - |
|
94 |
lowestPrice = dealScraperAmazon.read(deal_url)
|
| - |
|
95 |
if lowestPrice == 0:
|
| - |
|
96 |
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
|
| - |
|
97 |
lowestPrice = scraperAmazon.read(url)
|
| - |
|
98 |
dealScraping = False
|
| - |
|
99 |
|
| 68 |
print lowestPrice
|
100 |
print lowestPrice
|
| 69 |
if lowestPrice > 0:
|
101 |
if lowestPrice > 0:
|
| 70 |
inStock = 1
|
102 |
inStock = 1
|
| 71 |
print lowestPrice
|
103 |
print lowestPrice
|
| 72 |
print inStock
|
104 |
print inStock
|
| 73 |
if lowestPrice > 0:
|
105 |
if lowestPrice > 0:
|
| 74 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
|
106 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
|
| 75 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
|
107 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
|
| 76 |
else:
|
108 |
else:
|
| 77 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
|
109 |
get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
|
| 78 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
|
110 |
get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
|
| 79 |
|
111 |
|
| 80 |
try:
|
112 |
try:
|
| 81 |
recomputeDeal(data['skuBundleId'])
|
113 |
recomputeDeal(data['skuBundleId'])
|
| 82 |
except:
|
114 |
except:
|
| 83 |
print "Unable to compute deal for ",data['skuBundleId']
|
115 |
print "Unable to compute deal for ",data['skuBundleId']
|