Subversion Repositories SmartDukaan

Rev

Rev 19185 | Rev 20347 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 19185 Rev 20311
Line 1... Line 1...
1
import pymongo
1
import pymongo
2
from dtr.utils.utils import to_java_date, getNlcPoints
2
from dtr.utils.utils import to_java_date
3
from datetime import datetime, timedelta
3
from datetime import datetime, timedelta
4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
-
 
5
from multiprocessing import Pool as ThreadPool
-
 
6
from multiprocessing import cpu_count
-
 
7
import optparse
4
import optparse
8
from dtr.storage.MemCache import MemCache
5
from dtr.storage.MemCache import MemCache
9
from dtr.utils.utils import getCashBack
6
from dtr.utils.utils import getCashBack
10
import traceback
7
from shop2020.model.v1.catalog.script import AmazonAsyncScraper
-
 
8
 
11
 
9
 
12
con = None
10
con = None
13
 
11
 
14
parser = optparse.OptionParser()
12
parser = optparse.OptionParser()
15
parser.add_option("-m", "--m", dest="mongoHost",
13
parser.add_option("-m", "--m", dest="mongoHost",
Line 18... Line 16...
18
                      metavar="mongo_host")
16
                      metavar="mongo_host")
19
 
17
 
20
(options, args) = parser.parse_args()
18
(options, args) = parser.parse_args()
21
 
19
 
22
mc = MemCache(options.mongoHost)
20
mc = MemCache(options.mongoHost)
23
 
-
 
-
 
21
amScraper = AmazonAsyncScraper.Products("AKIAII3SGRXBJDPCHSGQ", "B92xTbNBTYygbGs98w01nFQUhbec1pNCkCsKVfpg", "AF6E3O0VE0X4D")
-
 
22
marketplaceId = 'A21TJRUUN4KGV'
24
 
23
 
25
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
24
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
26
 
25
 
27
def get_mongo_connection(host=options.mongoHost, port=27017):
26
def get_mongo_connection(host=options.mongoHost, port=27017):
28
    global con
27
    global con
Line 102... Line 101...
102
        else:
101
        else:
103
            data = toScrapMap.get(manualDeal['sku'])
102
            data = toScrapMap.get(manualDeal['sku'])
104
            data['dealFlag'] = 1
103
            data['dealFlag'] = 1
105
            data['dealType'] = manualDeal['dealType']
104
            data['dealType'] = manualDeal['dealType']
106
            data['dealUrl'] = manualDeal['dealUrl']
105
            data['dealUrl'] = manualDeal['dealUrl']
-
 
106
    
107
    pool = ThreadPool(cpu_count() * 2)
107
    count = 0
-
 
108
    temp = {}
108
    pool.map(scrapeAmazon,toScrapMap.values())
109
    for v in toScrapMap.itervalues():
109
    pool.close()
110
        print v
-
 
111
        #Lets validate identifier
-
 
112
        if len(str(v['identifier']).strip()) !=10:
110
    pool.join()
113
            continue
111
    print "joining threads at %s"%(str(datetime.now()))
114
        temp[str(v['identifier']).strip().upper()] = v
-
 
115
        count = count+1
-
 
116
        if count == 20:
-
 
117
            scrapeAmazon(temp)
-
 
118
            temp = {}
-
 
119
            count =0
112
        
120
        
113
 
121
 
114
def scrapeAmazon(data):
122
def scrapeAmazon(dataMap):
115
    inStock = 0
-
 
116
    dealScraping = False
123
    asinPricingMap = amScraper.get_competitive_pricing_for_asin(marketplaceId, dataMap.keys())
117
    print str(data['identifier'])
124
    print "asinPricingMap ",asinPricingMap
118
    if data['identifier'] is None or len(data['identifier'].strip())==0:
125
    for k, data in dataMap.iteritems():
119
        return
126
        print data
120
    
-
 
121
    if data.get('ignorePricing') ==1:
-
 
122
        print "Ignored items returning for %d"%(data['_id'])
-
 
123
        return 
127
        inStock = 0
124
    
-
 
125
    try:
-
 
126
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
128
        print str(data['identifier'])
127
            print "sku id is already updated",data['_id'] 
129
        if data['identifier'] is None or len(data['identifier'].strip())==0:
128
            return
130
            return
129
    except:
-
 
130
        pass
-
 
131
    
-
 
132
    url = "http://www.amazon.in/gp/aw/ol/%s?o=New&op=1"%(data['identifier'])
-
 
133
    
-
 
134
    try:
-
 
135
        if data['dealFlag'] ==1 and data['dealType'] ==1:
-
 
136
            deal_url = data['dealUrl'].strip()
-
 
137
            dealScraping = True
-
 
138
    except:
-
 
139
        data['dealFlag'] = 0
-
 
140
        data['dealType'] = 0
-
 
141
        data['dealUrl'] = ""
-
 
142
        
-
 
143
    print url
-
 
144
    lowestPrice = 0.0
-
 
145
    
-
 
146
    if not dealScraping:
-
 
147
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
-
 
148
        lowestPrice = scraperAmazon.read(url)
-
 
149
    else:
-
 
150
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
-
 
151
        lowestPrice = dealScraperAmazon.read(deal_url)
-
 
152
        if lowestPrice == 0:
-
 
153
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
-
 
154
            lowestPrice = scraperAmazon.read(url)
-
 
155
            dealScraping = False
-
 
156
        
131
        
157
    print lowestPrice
132
        if data.get('ignorePricing') ==1:
158
    if lowestPrice > 0:
133
            print "Ignored items returning for %d"%(data['_id'])
159
        inStock = 1
134
            return 
160
        netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], lowestPrice)
-
 
161
    else:
-
 
162
        netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], data['available_price'])
-
 
163
        
135
        
164
    print lowestPrice
136
        try:
-
 
137
            if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
165
    print inStock
138
                print "sku id is already updated",data['_id'] 
166
    if lowestPrice > 0:
139
                return
167
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
-
 
168
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'],'netPriceAfterCashBack':netPriceAfterCashBack}}, multi=True)
140
        except:
169
    else:
141
            pass
170
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
-
 
171
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'], 'netPriceAfterCashBack':netPriceAfterCashBack}})
-
 
172
        
142
        
-
 
143
        lowestPrice = asinPricingMap.get(k)    
-
 
144
        print lowestPrice
-
 
145
        if lowestPrice > 0:
-
 
146
            inStock = 1
-
 
147
            netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], lowestPrice)
-
 
148
        else:
-
 
149
            netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('AMAZON'), data['category_id'], data['available_price'])
-
 
150
            
-
 
151
        print lowestPrice
-
 
152
        print inStock
-
 
153
        if lowestPrice > 0:
-
 
154
            get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
173
    try:
155
            get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'],'netPriceAfterCashBack':netPriceAfterCashBack}}, multi=True)
-
 
156
        else:
-
 
157
            get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
-
 
158
            get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable'], 'netPriceAfterCashBack':netPriceAfterCashBack}})
-
 
159
            
-
 
160
        try:
174
        recomputeDeal(data)
161
            recomputeDeal(data)
175
    except:
162
        except:
176
        print "Unable to compute deal for ",data['skuBundleId']    
163
            print "Unable to compute deal for ",data['skuBundleId']    
177
            
164
            
178
def populateNegativeDeals():
165
def populateNegativeDeals():
179
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
166
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
180
    mc.set("negative_deals", negativeDeals, 600)
167
    mc.set("negative_deals", negativeDeals, 600)
181
 
168