Subversion Repositories SmartDukaan

Rev

Rev 16503 | Rev 17264 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
15269 kshitij.so 2
from dtr.utils.utils import to_java_date, getNlcPoints
13915 kshitij.so 3
from datetime import datetime, timedelta
13828 kshitij.so 4
from operator import itemgetter
14123 kshitij.so 5
from dtr.utils import FlipkartScraper,NewFlipkartScraper
14178 kshitij.so 6
from multiprocessing import Pool as ThreadPool
14172 kshitij.so 7
from multiprocessing import cpu_count
14255 kshitij.so 8
import optparse
14325 kshitij.so 9
from dtr.storage.MemCache import MemCache
14705 kshitij.so 10
from dtr.utils.utils import getCashBack
15269 kshitij.so 11
import traceback
13828 kshitij.so 12
 
13
con = None
14
 
14255 kshitij.so 15
parser = optparse.OptionParser()
16
parser.add_option("-m", "--m", dest="mongoHost",
17
                      default="localhost",
18
                      type="string", help="The HOST where the mongo server is running",
19
                      metavar="mongo_host")
20
 
21
(options, args) = parser.parse_args()
22
 
14325 kshitij.so 23
mc = MemCache(options.mongoHost)
24
 
16869 kshitij.so 25
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5,'PAYTM.COM':6}
15610 kshitij.so 26
 
14255 kshitij.so 27
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 28
    global con
29
    if con is None:
30
        print "Establishing connection %s host and port %d" %(host,port)
31
        try:
32
            con = pymongo.MongoClient(host, port)
33
        except Exception, e:
34
            print e
35
            return None
36
    return con
37
 
14149 kshitij.so 38
def populate():
39
    toScrapMap = {}
14131 kshitij.so 40
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
41
    for bestSeller in bestSellers: 
14149 kshitij.so 42
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':2}))
43
        for data in amazonBestSellers:
44
            if not toScrapMap.has_key(data['_id']):
15269 kshitij.so 45
                data['dealFlag'] = 0
46
                data['dealType'] = 0
14149 kshitij.so 47
                toScrapMap[data['_id']] = data
16175 kshitij.so 48
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':2,'showDeal':1,'totalPoints':{'$gt':-100}}))
14251 kshitij.so 49
    for deal in dealFlagged:
50
        if not toScrapMap.has_key(deal['_id']):
14262 kshitij.so 51
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
15269 kshitij.so 52
            data[0]['dealFlag'] = 0
53
            data[0]['dealType'] = 0
14262 kshitij.so 54
            toScrapMap[deal['_id']] = data[0]
15269 kshitij.so 55
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':2}))
56
    for manualDeal in manualDeals:
57
        if not toScrapMap.has_key(manualDeal['sku']):
58
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
59
            if len(data) > 0:
60
                data[0]['dealFlag'] = 1
61
                data[0]['dealType'] = manualDeal['dealType']
62
                toScrapMap[manualDeal['sku']] = data[0]
63
        else:
64
            data = toScrapMap.get(manualDeal['sku'])
65
            data['dealFlag'] = 1
66
            data['dealType'] = manualDeal['dealType']
14178 kshitij.so 67
    pool = ThreadPool(cpu_count() *2)
14149 kshitij.so 68
    pool.map(scrapeFlipkart,toScrapMap.values())
69
    pool.close()
70
    pool.join()
14251 kshitij.so 71
    print "joining threads at %s"%(str(datetime.now()))
14149 kshitij.so 72
 
73
def scrapeFlipkart(data):
74
    if data['source_id']!=2:
14157 kshitij.so 75
        return
14149 kshitij.so 76
    retryCount = 0
77
    if data['identifier'] is None or len(data['identifier'].strip())==0:
14157 kshitij.so 78
        print "returning in valid identifier"
79
        return
14149 kshitij.so 80
 
16503 kshitij.so 81
    if data.get('ignorePricing') ==1:
15610 kshitij.so 82
        print "Ignored items returning for %d"%(data['_id'])
83
        return 
84
 
14149 kshitij.so 85
    try:
86
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
15269 kshitij.so 87
            print "sku id is already updated %d" %(data['_id']) 
14157 kshitij.so 88
            return
14149 kshitij.so 89
    except:
90
        pass
91
 
92
 
93
    lowestSp = 0
94
    inStock = 0
15269 kshitij.so 95
    buyBoxPrice = 0
96
    isBuyBox = 0
14157 kshitij.so 97
    scraperFk = FlipkartScraper.FlipkartScraper()
98
    scraperProductPage = NewFlipkartScraper.FlipkartProductPageScraper()
14149 kshitij.so 99
    try:
100
        if data['marketPlaceUrl']!="" or data['marketPlaceUrl'] !="http://www.flipkart.com/ps/%s"%(data['identifier']):
101
            result = scraperProductPage.read(data['marketPlaceUrl'])
102
            if result.get('lowestSp')!=0:
103
                lowestSp = result.get('lowestSp')
104
                inStock = result.get('inStock')
15269 kshitij.so 105
                buyBoxPrice = result.get('buyBoxPrice')
14149 kshitij.so 106
    except:
15269 kshitij.so 107
        print "Unable to scrape product page %s" %(data['identifier'])
14149 kshitij.so 108
 
109
 
110
    if lowestSp == 0:
111
        url = "http://www.flipkart.com/ps/%s"%(data['identifier'].strip())
112
        while(retryCount < 3):
14131 kshitij.so 113
            try:
15269 kshitij.so 114
                vendorsData, buyBoxInfo = (scraperFk.read(url))
14149 kshitij.so 115
                fetched = True
116
                break
117
            except Exception as e:
118
                print "***Retry count ",retryCount 
119
                retryCount+=1
120
                if retryCount == 3:
121
                    fetched = False
122
                print e
123
        if not fetched:
15269 kshitij.so 124
            print "Unable to fetch data after multiple tries.Continue for %s"%(data['identifier'])
14157 kshitij.so 125
            return
14149 kshitij.so 126
 
127
        sortedVendorsData = []
128
        sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
129
        print "data",sortedVendorsData
130
        lowestSp, iterator = (0,)*2
131
        for vData in sortedVendorsData:
132
            if iterator == 0:
133
                lowestSp = vData['sellingPrice']
134
            break
135
        if lowestSp > 0:
136
            inStock = 1
15269 kshitij.so 137
        if len(buyBoxInfo) > 0:
138
            buyBoxPrice = buyBoxInfo[0].get('sellingPrice')
139
        else:
140
            print "No info about buy box for %d"%(data.get('_id'))
14149 kshitij.so 141
    print lowestSp
142
    print inStock
15269 kshitij.so 143
    if buyBoxPrice is not None and buyBoxPrice == lowestSp:
144
        isBuyBox = 1
14149 kshitij.so 145
    if lowestSp > 0:
15269 kshitij.so 146
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestSp,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock,'buyBoxFlag':isBuyBox}}, multi=True)
16019 kshitij.so 147
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestSp , 'in_stock':inStock,'codAvailable':data['codAvailable']}}, multi=True)
14149 kshitij.so 148
    else:
15269 kshitij.so 149
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now()),'buyBoxFlag':isBuyBox}}, multi=True)
16019 kshitij.so 150
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'codAvailable':data['codAvailable']}})
14149 kshitij.so 151
 
152
    try:
15269 kshitij.so 153
        recomputeDeal(data)
14149 kshitij.so 154
    except:
15269 kshitij.so 155
        print "Unable to compute deal for %s"%(data['skuBundleId'])
13828 kshitij.so 156
 
16503 kshitij.so 157
#def recomputePoints(item, deal):
158
#    try:
159
#        if item.get('available_price') == deal['available_price']:
160
#            print "No need to compute points for %d , as price is still same" %(item['_id'])
161
#            raise
162
#        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
163
#    except:
164
#        print traceback.print_exc()
165
#        nlcPoints = deal['nlcPoints']
166
#    
167
#    bundleDealPoints = list(get_mongo_connection().Catalog.DealPoints.find({'skuBundleId':item['skuBundleId'],'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())}}))
168
#    if len(bundleDealPoints) > 0:
169
#        item['manualDealThresholdPrice'] = bundleDealPoints[0]['dealThresholdPrice']
170
#        dealPoints = bundleDealPoints[0]['dealPoints']
171
#    else:
172
#        dealPoints = 0
173
#        item['manualDealThresholdPrice'] = None
174
#    
175
#    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})
15269 kshitij.so 176
 
14325 kshitij.so 177
def populateNegativeDeals():
178
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
179
    mc.set("negative_deals", negativeDeals, 600)  
180
 
15269 kshitij.so 181
def recomputeDeal(item):
13915 kshitij.so 182
    """Lets recompute deal for this bundle"""
15269 kshitij.so 183
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
184
    skuBundleId = item['skuBundleId']
13915 kshitij.so 185
 
186
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
187
    bestPrice = float("inf")
188
    bestOne = None
189
    bestSellerPoints = 0
190
    toUpdate = []
16019 kshitij.so 191
    prepaidBestPrice = float("inf")
192
    prepaidBestOne = None
193
    prepaidBestSellerPoints = 0
13915 kshitij.so 194
    for similarItem in similarItems:
16019 kshitij.so 195
        if similarItem['codAvailable'] ==1:
196
            if mc.get("negative_deals") is None:
197
                populateNegativeDeals()
16175 kshitij.so 198
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 199
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
200
                continue
201
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
202
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
203
                continue
204
            if similarItem['available_price'] < bestPrice:
205
                bestOne = similarItem
206
                bestPrice = similarItem['available_price']
207
                bestSellerPoints = similarItem['bestSellerPoints']
208
            elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
209
                bestOne = similarItem
210
                bestPrice = similarItem['available_price']
211
                bestSellerPoints = similarItem['bestSellerPoints']
212
            else:
213
                pass
13915 kshitij.so 214
        else:
16019 kshitij.so 215
            if mc.get("negative_deals") is None:
216
                populateNegativeDeals()
16175 kshitij.so 217
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 218
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
219
                continue
220
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
221
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
222
                continue
16869 kshitij.so 223
            if similarItem['source_id'] == SOURCE_MAP.get('PAYTM.COM'):
224
                similarItem['available_price'] = similarItem['gross_price']
16019 kshitij.so 225
            if similarItem['available_price'] < prepaidBestPrice:
226
                prepaidBestOne = similarItem
227
                prepaidBestPrice = similarItem['available_price']
228
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
229
            elif similarItem['available_price'] == prepaidBestPrice and prepaidBestSellerPoints < similarItem['bestSellerPoints']:
230
                prepaidBestOne = similarItem
231
                prepaidBestPrice = similarItem['available_price']
232
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
233
            else:
234
                pass
16026 kshitij.so 235
    if bestOne is not None or prepaidBestOne is not None:
13915 kshitij.so 236
        for similarItem in similarItems:
237
            toUpdate.append(similarItem['_id'])
16026 kshitij.so 238
        if bestOne is not None:
239
            toUpdate.remove(bestOne['_id'])
240
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
241
        if prepaidBestOne is not None:
16074 kshitij.so 242
            if bestOne is not None:
243
                if prepaidBestOne['available_price'] < bestOne['available_price']: 
244
                    toUpdate.remove(prepaidBestOne['_id'])
245
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
246
            else:
247
                toUpdate.remove(prepaidBestOne['_id'])
248
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
13915 kshitij.so 249
    if len(toUpdate) > 0:
16019 kshitij.so 250
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)
13915 kshitij.so 251
 
13828 kshitij.so 252
def main():
14157 kshitij.so 253
    populate()
13828 kshitij.so 254
 
255
if __name__=='__main__':
256
    main()