Subversion Repositories SmartDukaan

Rev

Rev 16509 | Rev 19185 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
15267 kshitij.so 2
from dtr.utils.utils import to_java_date, getNlcPoints
13914 kshitij.so 3
from datetime import datetime, timedelta
14308 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
14325 kshitij.so 8
from dtr.storage.MemCache import MemCache
14705 kshitij.so 9
from dtr.utils.utils import getCashBack
15267 kshitij.so 10
import traceback
13828 kshitij.so 11
 
12
con = None
13
 
14256 kshitij.so 14
parser = optparse.OptionParser()
15
parser.add_option("-m", "--m", dest="mongoHost",
16
                      default="localhost",
17
                      type="string", help="The HOST where the mongo server is running",
18
                      metavar="mongo_host")
19
 
20
(options, args) = parser.parse_args()
21
 
14325 kshitij.so 22
mc = MemCache(options.mongoHost)
23
 
15616 kshitij.so 24
 
16869 kshitij.so 25
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
16019 kshitij.so 26
 
14256 kshitij.so 27
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 28
    global con
29
    if con is None:
30
        print "Establishing connection %s host and port %d" %(host,port)
31
        try:
32
            con = pymongo.MongoClient(host, port)
33
        except Exception, e:
34
            print e
35
            return None
36
    return con
37
 
14147 kshitij.so 38
def populate():
39
    toScrapMap = {}
14133 kshitij.so 40
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
41
    for bestSeller in bestSellers: 
42
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
43
        for data in amazonBestSellers:
14147 kshitij.so 44
            if not toScrapMap.has_key(data['_id']):
15267 kshitij.so 45
                data['dealFlag'] = 0
46
                data['dealType'] = 0
16509 kshitij.so 47
                data['dealUrl'] = ""
14147 kshitij.so 48
                toScrapMap[data['_id']] = data
16173 kshitij.so 49
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':-100}}))
14250 kshitij.so 50
    for deal in dealFlagged:
51
        if not toScrapMap.has_key(deal['_id']):
14260 kshitij.so 52
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
15267 kshitij.so 53
            data[0]['dealFlag'] = 0
54
            data[0]['dealType'] = 0
16509 kshitij.so 55
            data[0]['dealUrl'] = ""
14260 kshitij.so 56
            toScrapMap[deal['_id']] = data[0]
14308 kshitij.so 57
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
58
    for manualDeal in manualDeals:
59
        if not toScrapMap.has_key(manualDeal['sku']):
60
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
61
            if len(data) > 0:
62
                data[0]['dealFlag'] = 1
63
                data[0]['dealType'] = manualDeal['dealType']
16509 kshitij.so 64
                data[0]['dealUrl'] = manualDeal['dealUrl']
14308 kshitij.so 65
                toScrapMap[manualDeal['sku']] = data[0]
66
        else:
67
            data = toScrapMap.get(manualDeal['sku'])
68
            data['dealFlag'] = 1
69
            data['dealType'] = manualDeal['dealType']
16509 kshitij.so 70
            data['dealUrl'] = manualDeal['dealUrl']
14170 kshitij.so 71
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 72
    pool.map(scrapeAmazon,toScrapMap.values())
73
    pool.close()
74
    pool.join()
14250 kshitij.so 75
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 76
 
77
 
78
def scrapeAmazon(data):
79
    inStock = 0
14308 kshitij.so 80
    dealScraping = False
14147 kshitij.so 81
    print str(data['identifier'])
82
    if data['identifier'] is None or len(data['identifier'].strip())==0:
83
        return
84
 
16501 kshitij.so 85
    if data.get('ignorePricing') ==1:
15616 kshitij.so 86
        print "Ignored items returning for %d"%(data['_id'])
87
        return 
88
 
14147 kshitij.so 89
    try:
90
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
91
            print "sku id is already updated",data['_id'] 
92
            return
93
    except:
94
        pass
95
 
15959 kshitij.so 96
    url = "http://www.amazon.in/gp/aw/ol/%s?o=New&op=1"%(data['identifier'])
14308 kshitij.so 97
 
98
    try:
99
        if data['dealFlag'] ==1 and data['dealType'] ==1:
16509 kshitij.so 100
            deal_url = data['dealUrl'].strip()
14308 kshitij.so 101
            dealScraping = True
102
    except:
103
        data['dealFlag'] = 0
104
        data['dealType'] = 0
16509 kshitij.so 105
        data['dealUrl'] = ""
14308 kshitij.so 106
 
14147 kshitij.so 107
    print url
108
    lowestPrice = 0.0
14308 kshitij.so 109
 
110
    if not dealScraping:
111
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
112
        lowestPrice = scraperAmazon.read(url)
113
    else:
114
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
115
        lowestPrice = dealScraperAmazon.read(deal_url)
116
        if lowestPrice == 0:
117
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
118
            lowestPrice = scraperAmazon.read(url)
119
            dealScraping = False
120
 
14147 kshitij.so 121
    print lowestPrice
122
    if lowestPrice > 0:
123
        inStock = 1
124
    print lowestPrice
125
    print inStock
126
    if lowestPrice > 0:
127
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
16019 kshitij.so 128
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable']}}, multi=True)
14147 kshitij.so 129
    else:
130
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
16019 kshitij.so 131
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable']}})
14147 kshitij.so 132
 
133
    try:
15267 kshitij.so 134
        recomputeDeal(data)
14147 kshitij.so 135
    except:
136
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 137
 
14325 kshitij.so 138
def populateNegativeDeals():
139
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
15267 kshitij.so 140
    mc.set("negative_deals", negativeDeals, 600)
13828 kshitij.so 141
 
16501 kshitij.so 142
#def recomputePoints(item, deal):
143
#    try:
144
#        if item.get('available_price') == deal['available_price']:
145
#            print "No need to compute points for %d , as price is still same" %(item['_id'])
146
#            raise
147
#        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
148
#    except:
149
#        traceback.print_exc()
150
#        nlcPoints = deal['nlcPoints']
151
#    
152
#    bundleDealPoints = list(get_mongo_connection().Catalog.DealPoints.find({'skuBundleId':item['skuBundleId'],'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())}}))
153
#    if len(bundleDealPoints) > 0:
154
#        item['manualDealThresholdPrice'] = bundleDealPoints[0]['dealThresholdPrice']
155
#        dealPoints = bundleDealPoints[0]['dealPoints']
156
#    else:
157
#        dealPoints = 0
158
#        item['manualDealThresholdPrice'] = None
159
#    
160
#    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})
15267 kshitij.so 161
 
162
 
163
 
164
def recomputeDeal(item):
13914 kshitij.so 165
    """Lets recompute deal for this bundle"""
16019 kshitij.so 166
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
15267 kshitij.so 167
    skuBundleId = item['skuBundleId']
13914 kshitij.so 168
 
169
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
170
    bestPrice = float("inf")
171
    bestOne = None
172
    bestSellerPoints = 0
173
    toUpdate = []
16019 kshitij.so 174
    prepaidBestPrice = float("inf")
175
    prepaidBestOne = None
176
    prepaidBestSellerPoints = 0
13914 kshitij.so 177
    for similarItem in similarItems:
16019 kshitij.so 178
        if similarItem['codAvailable'] ==1:
179
            if mc.get("negative_deals") is None:
180
                populateNegativeDeals()
16173 kshitij.so 181
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 182
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
183
                continue
184
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
185
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
186
                continue
187
            if similarItem['available_price'] < bestPrice:
188
                bestOne = similarItem
189
                bestPrice = similarItem['available_price']
190
                bestSellerPoints = similarItem['bestSellerPoints']
191
            elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
192
                bestOne = similarItem
193
                bestPrice = similarItem['available_price']
194
                bestSellerPoints = similarItem['bestSellerPoints']
195
            else:
196
                pass
13914 kshitij.so 197
        else:
16019 kshitij.so 198
            if mc.get("negative_deals") is None:
199
                populateNegativeDeals()
16173 kshitij.so 200
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 201
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
202
                continue
203
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
204
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
205
                continue
16869 kshitij.so 206
            if similarItem['source_id'] == SOURCE_MAP.get('PAYTM.COM'):
207
                similarItem['available_price'] = similarItem['gross_price']
16019 kshitij.so 208
            if similarItem['available_price'] < prepaidBestPrice:
209
                prepaidBestOne = similarItem
210
                prepaidBestPrice = similarItem['available_price']
211
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
212
            elif similarItem['available_price'] == prepaidBestPrice and prepaidBestSellerPoints < similarItem['bestSellerPoints']:
213
                prepaidBestOne = similarItem
214
                prepaidBestPrice = similarItem['available_price']
215
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
216
            else:
217
                pass
16026 kshitij.so 218
    if bestOne is not None or prepaidBestOne is not None:
13914 kshitij.so 219
        for similarItem in similarItems:
220
            toUpdate.append(similarItem['_id'])
16026 kshitij.so 221
        if bestOne is not None:
222
            toUpdate.remove(bestOne['_id'])
223
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
224
        if prepaidBestOne is not None:
16073 kshitij.so 225
            if bestOne is not None:
226
                if prepaidBestOne['available_price'] < bestOne['available_price']: 
227
                    toUpdate.remove(prepaidBestOne['_id'])
228
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
229
            else:
230
                toUpdate.remove(prepaidBestOne['_id'])
231
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
13914 kshitij.so 232
    if len(toUpdate) > 0:
16019 kshitij.so 233
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)
13914 kshitij.so 234
 
13828 kshitij.so 235
def main():
14147 kshitij.so 236
    populate()
13828 kshitij.so 237
 
238
if __name__=='__main__':
239
    main()