Subversion Repositories SmartDukaan

Rev

Rev 20347 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
20347 kshitij.so 2
from dtr.utils.utils import to_java_date, getNlcPoints, DEAL_PRIORITY
13915 kshitij.so 3
from datetime import datetime, timedelta
13828 kshitij.so 4
from operator import itemgetter
14123 kshitij.so 5
from dtr.utils import FlipkartScraper,NewFlipkartScraper
14178 kshitij.so 6
from multiprocessing import Pool as ThreadPool
14172 kshitij.so 7
from multiprocessing import cpu_count
14255 kshitij.so 8
import optparse
14325 kshitij.so 9
from dtr.storage.MemCache import MemCache
14705 kshitij.so 10
from dtr.utils.utils import getCashBack
15269 kshitij.so 11
import traceback
13828 kshitij.so 12
 
13
con = None
14
 
14255 kshitij.so 15
parser = optparse.OptionParser()
16
parser.add_option("-m", "--m", dest="mongoHost",
17
                      default="localhost",
18
                      type="string", help="The HOST where the mongo server is running",
19
                      metavar="mongo_host")
20
 
21
(options, args) = parser.parse_args()
22
 
14325 kshitij.so 23
mc = MemCache(options.mongoHost)
24
 
16869 kshitij.so 25
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
15610 kshitij.so 26
 
14255 kshitij.so 27
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 28
    global con
29
    if con is None:
30
        print "Establishing connection %s host and port %d" %(host,port)
31
        try:
32
            con = pymongo.MongoClient(host, port)
33
        except Exception, e:
34
            print e
35
            return None
36
    return con
37
 
19187 kshitij.so 38
def getNetPriceForItem(itemId, source_id, category_id ,price):
39
    cash_back_type = 0
40
    cash_back = 0
41
    try:
42
        cashBack = getCashBack(itemId, source_id, category_id, mc, options.mongoHost)
43
        if not cashBack or cashBack.get('cash_back_status')!=1:
44
            cash_back_type = 0
45
            cash_back = 0 
46
 
47
        else:
48
            if cashBack['cash_back_type'] in (1,2):
49
 
50
                if cashBack.get('maxCashBack') is not None:
51
 
52
                    if cashBack.get('cash_back_type') ==1 and (float(cashBack.get('cash_back'))*price)/100 > cashBack.get('maxCashBack'):
53
                        cashBack['cash_back_type'] = 2
54
                        cashBack['cash_back'] = cashBack['maxCashBack']
55
                    elif cashBack.get('cash_back_type') ==2 and cashBack.get('cash_back') > cashBack.get('maxCashBack'):
56
                        cashBack['cash_back'] = cashBack['maxCashBack']
57
                    else:
58
                        pass
59
 
60
 
61
 
62
                cash_back_type = cashBack['cash_back_type']
63
                cash_back = float(cashBack['cash_back'])
64
    except Exception as cashBackEx:
65
        pass
66
 
67
    if cash_back_type ==1:
68
        return (price - float(cash_back)*price/100)
69
    elif cash_back_type ==2:
70
        return (price - cash_back)
71
    else:
72
        return price
73
 
74
 
14149 kshitij.so 75
def populate():
76
    toScrapMap = {}
14131 kshitij.so 77
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
78
    for bestSeller in bestSellers: 
14149 kshitij.so 79
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':2}))
80
        for data in amazonBestSellers:
81
            if not toScrapMap.has_key(data['_id']):
15269 kshitij.so 82
                data['dealFlag'] = 0
83
                data['dealType'] = 0
14149 kshitij.so 84
                toScrapMap[data['_id']] = data
16175 kshitij.so 85
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':2,'showDeal':1,'totalPoints':{'$gt':-100}}))
14251 kshitij.so 86
    for deal in dealFlagged:
87
        if not toScrapMap.has_key(deal['_id']):
14262 kshitij.so 88
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
15269 kshitij.so 89
            data[0]['dealFlag'] = 0
90
            data[0]['dealType'] = 0
14262 kshitij.so 91
            toScrapMap[deal['_id']] = data[0]
15269 kshitij.so 92
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':2}))
93
    for manualDeal in manualDeals:
94
        if not toScrapMap.has_key(manualDeal['sku']):
95
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
96
            if len(data) > 0:
97
                data[0]['dealFlag'] = 1
98
                data[0]['dealType'] = manualDeal['dealType']
99
                toScrapMap[manualDeal['sku']] = data[0]
100
        else:
101
            data = toScrapMap.get(manualDeal['sku'])
102
            data['dealFlag'] = 1
103
            data['dealType'] = manualDeal['dealType']
14178 kshitij.so 104
    pool = ThreadPool(cpu_count() *2)
14149 kshitij.so 105
    pool.map(scrapeFlipkart,toScrapMap.values())
106
    pool.close()
107
    pool.join()
14251 kshitij.so 108
    print "joining threads at %s"%(str(datetime.now()))
14149 kshitij.so 109
 
110
def scrapeFlipkart(data):
111
    if data['source_id']!=2:
14157 kshitij.so 112
        return
14149 kshitij.so 113
    retryCount = 0
114
    if data['identifier'] is None or len(data['identifier'].strip())==0:
14157 kshitij.so 115
        print "returning in valid identifier"
116
        return
14149 kshitij.so 117
 
16503 kshitij.so 118
    if data.get('ignorePricing') ==1:
15610 kshitij.so 119
        print "Ignored items returning for %d"%(data['_id'])
120
        return 
121
 
14149 kshitij.so 122
    try:
123
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
15269 kshitij.so 124
            print "sku id is already updated %d" %(data['_id']) 
14157 kshitij.so 125
            return
14149 kshitij.so 126
    except:
127
        pass
128
 
129
 
130
    lowestSp = 0
131
    inStock = 0
15269 kshitij.so 132
    buyBoxPrice = 0
133
    isBuyBox = 0
14157 kshitij.so 134
    scraperProductPage = NewFlipkartScraper.FlipkartProductPageScraper()
14149 kshitij.so 135
    try:
17264 kshitij.so 136
        result = scraperProductPage.read(data['identifier'])
137
        if result.get('lowestSp')!=0:
138
            lowestSp = result.get('lowestSp')
139
            inStock = result.get('inStock')
140
            buyBoxPrice = result.get('buyBoxPrice')
14149 kshitij.so 141
    except:
15269 kshitij.so 142
        print "Unable to scrape product page %s" %(data['identifier'])
17264 kshitij.so 143
        return
14149 kshitij.so 144
 
145
 
146
    print lowestSp
147
    print inStock
15269 kshitij.so 148
    if buyBoxPrice is not None and buyBoxPrice == lowestSp:
149
        isBuyBox = 1
14149 kshitij.so 150
    if lowestSp > 0:
19187 kshitij.so 151
        netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('FLIPKART'), data['category_id'], lowestSp)
15269 kshitij.so 152
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestSp,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock,'buyBoxFlag':isBuyBox}}, multi=True)
19187 kshitij.so 153
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestSp , 'in_stock':inStock,'codAvailable':data['codAvailable'],'netPriceAfterCashBack':netPriceAfterCashBack}}, multi=True)
14149 kshitij.so 154
    else:
19187 kshitij.so 155
        netPriceAfterCashBack = getNetPriceForItem(data['_id'], SOURCE_MAP.get('FLIPKART'), data['category_id'], data['available_price'])
15269 kshitij.so 156
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now()),'buyBoxFlag':isBuyBox}}, multi=True)
19187 kshitij.so 157
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'codAvailable':data['codAvailable'],'netPriceAfterCashBack':netPriceAfterCashBack}})
14149 kshitij.so 158
 
159
    try:
15269 kshitij.so 160
        recomputeDeal(data)
14149 kshitij.so 161
    except:
15269 kshitij.so 162
        print "Unable to compute deal for %s"%(data['skuBundleId'])
13828 kshitij.so 163
 
16503 kshitij.so 164
#def recomputePoints(item, deal):
165
#    try:
166
#        if item.get('available_price') == deal['available_price']:
167
#            print "No need to compute points for %d , as price is still same" %(item['_id'])
168
#            raise
169
#        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
170
#    except:
171
#        print traceback.print_exc()
172
#        nlcPoints = deal['nlcPoints']
173
#    
174
#    bundleDealPoints = list(get_mongo_connection().Catalog.DealPoints.find({'skuBundleId':item['skuBundleId'],'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())}}))
175
#    if len(bundleDealPoints) > 0:
176
#        item['manualDealThresholdPrice'] = bundleDealPoints[0]['dealThresholdPrice']
177
#        dealPoints = bundleDealPoints[0]['dealPoints']
178
#    else:
179
#        dealPoints = 0
180
#        item['manualDealThresholdPrice'] = None
181
#    
182
#    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})
15269 kshitij.so 183
 
14325 kshitij.so 184
def populateNegativeDeals():
185
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
186
    mc.set("negative_deals", negativeDeals, 600)  
187
 
15269 kshitij.so 188
def recomputeDeal(item):
13915 kshitij.so 189
    """Lets recompute deal for this bundle"""
15269 kshitij.so 190
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
191
    skuBundleId = item['skuBundleId']
13915 kshitij.so 192
 
19187 kshitij.so 193
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('netPriceAfterCashBack',pymongo.ASCENDING)]))
13915 kshitij.so 194
    bestPrice = float("inf")
195
    bestOne = None
196
    toUpdate = []
16019 kshitij.so 197
    prepaidBestPrice = float("inf")
198
    prepaidBestOne = None
13915 kshitij.so 199
    for similarItem in similarItems:
16019 kshitij.so 200
        if similarItem['codAvailable'] ==1:
201
            if mc.get("negative_deals") is None:
202
                populateNegativeDeals()
16175 kshitij.so 203
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 204
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
205
                continue
206
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
207
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
208
                continue
19187 kshitij.so 209
            if similarItem.get('netPriceAfterCashBack') < bestPrice:
16019 kshitij.so 210
                bestOne = similarItem
19187 kshitij.so 211
                bestPrice = similarItem.get('netPriceAfterCashBack')
20347 kshitij.so 212
            elif similarItem.get('netPriceAfterCashBack') == bestPrice:
213
 
214
                try:
215
                    if (DEAL_PRIORITY.index(int(similarItem['source_id'])) > DEAL_PRIORITY.index(int(bestOne['source_id']))):
216
                        continue
217
                except:
218
                    traceback.print_exc()
219
 
16019 kshitij.so 220
                bestOne = similarItem
19187 kshitij.so 221
                bestPrice = similarItem.get('netPriceAfterCashBack')
16019 kshitij.so 222
            else:
223
                pass
13915 kshitij.so 224
        else:
16019 kshitij.so 225
            if mc.get("negative_deals") is None:
226
                populateNegativeDeals()
16175 kshitij.so 227
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 228
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
229
                continue
230
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
231
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
232
                continue
19187 kshitij.so 233
            if similarItem.get('netPriceAfterCashBack') < prepaidBestPrice:
16019 kshitij.so 234
                prepaidBestOne = similarItem
19187 kshitij.so 235
                prepaidBestPrice = similarItem.get('netPriceAfterCashBack')
20347 kshitij.so 236
            elif similarItem.get('netPriceAfterCashBack') == prepaidBestPrice:
237
 
238
                try:
20362 kshitij.so 239
                    if (DEAL_PRIORITY.index(int(similarItem['source_id'])) > DEAL_PRIORITY.index(int(prepaidBestOne['source_id']))):
20347 kshitij.so 240
                        continue
241
                except:
242
                    traceback.print_exc()
243
 
16019 kshitij.so 244
                prepaidBestOne = similarItem
19187 kshitij.so 245
                prepaidBestPrice = similarItem.get('netPriceAfterCashBack')
16019 kshitij.so 246
            else:
247
                pass
16026 kshitij.so 248
    if bestOne is not None or prepaidBestOne is not None:
13915 kshitij.so 249
        for similarItem in similarItems:
250
            toUpdate.append(similarItem['_id'])
16026 kshitij.so 251
        if bestOne is not None:
252
            toUpdate.remove(bestOne['_id'])
253
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
254
        if prepaidBestOne is not None:
16074 kshitij.so 255
            if bestOne is not None:
19187 kshitij.so 256
                if prepaidBestOne.get('netPriceAfterCashBack') < bestOne.get('netPriceAfterCashBack'): 
16074 kshitij.so 257
                    toUpdate.remove(prepaidBestOne['_id'])
258
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
259
            else:
260
                toUpdate.remove(prepaidBestOne['_id'])
261
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
13915 kshitij.so 262
    if len(toUpdate) > 0:
16019 kshitij.so 263
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)
13915 kshitij.so 264
 
19187 kshitij.so 265
 
13828 kshitij.so 266
def main():
14157 kshitij.so 267
    populate()
13828 kshitij.so 268
 
269
if __name__=='__main__':
270
    main()