Subversion Repositories SmartDukaan

Rev

Rev 16173 | Rev 16347 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
15267 kshitij.so 2
from dtr.utils.utils import to_java_date, getNlcPoints
13914 kshitij.so 3
from datetime import datetime, timedelta
14308 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
14325 kshitij.so 8
from dtr.storage.MemCache import MemCache
14705 kshitij.so 9
from dtr.utils.utils import getCashBack
15267 kshitij.so 10
import traceback
13828 kshitij.so 11
 
12
con = None
13
 
14256 kshitij.so 14
parser = optparse.OptionParser()
15
parser.add_option("-m", "--m", dest="mongoHost",
16
                      default="localhost",
17
                      type="string", help="The HOST where the mongo server is running",
18
                      metavar="mongo_host")
19
 
20
(options, args) = parser.parse_args()
21
 
14325 kshitij.so 22
mc = MemCache(options.mongoHost)
23
 
16334 kshitij.so 24
ignoreItems = [24104]
15616 kshitij.so 25
 
16019 kshitij.so 26
SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5}
27
 
14256 kshitij.so 28
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 29
    global con
30
    if con is None:
31
        print "Establishing connection %s host and port %d" %(host,port)
32
        try:
33
            con = pymongo.MongoClient(host, port)
34
        except Exception, e:
35
            print e
36
            return None
37
    return con
38
 
14147 kshitij.so 39
def populate():
40
    toScrapMap = {}
14133 kshitij.so 41
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
42
    for bestSeller in bestSellers: 
43
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
44
        for data in amazonBestSellers:
14147 kshitij.so 45
            if not toScrapMap.has_key(data['_id']):
15267 kshitij.so 46
                data['dealFlag'] = 0
47
                data['dealType'] = 0
48
                data['dealPoints'] = 0
49
                data['manualDealThresholdPrice'] = None
14147 kshitij.so 50
                toScrapMap[data['_id']] = data
16173 kshitij.so 51
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':-100}}))
14250 kshitij.so 52
    for deal in dealFlagged:
53
        if not toScrapMap.has_key(deal['_id']):
14260 kshitij.so 54
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
15267 kshitij.so 55
            data[0]['dealFlag'] = 0
56
            data[0]['dealType'] = 0
57
            data[0]['dealPoints'] = 0
58
            data[0]['manualDealThresholdPrice'] = None
14260 kshitij.so 59
            toScrapMap[deal['_id']] = data[0]
14308 kshitij.so 60
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
61
    for manualDeal in manualDeals:
62
        if not toScrapMap.has_key(manualDeal['sku']):
63
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
64
            if len(data) > 0:
65
                data[0]['dealFlag'] = 1
66
                data[0]['dealType'] = manualDeal['dealType']
15267 kshitij.so 67
                data[0]['dealPoints'] = manualDeal['dealPoints']
68
                data[0]['manualDealThresholdPrice'] = manualDeal['dealThresholdPrice']
14308 kshitij.so 69
                toScrapMap[manualDeal['sku']] = data[0]
70
        else:
71
            data = toScrapMap.get(manualDeal['sku'])
72
            data['dealFlag'] = 1
73
            data['dealType'] = manualDeal['dealType']
15267 kshitij.so 74
            data['dealPoints'] = manualDeal['dealPoints']
75
            data['manualDealThresholdPrice'] = manualDeal['dealThresholdPrice']
14170 kshitij.so 76
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 77
    pool.map(scrapeAmazon,toScrapMap.values())
78
    pool.close()
79
    pool.join()
14250 kshitij.so 80
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 81
 
82
 
83
def scrapeAmazon(data):
84
    inStock = 0
14308 kshitij.so 85
    dealScraping = False
14147 kshitij.so 86
    print str(data['identifier'])
87
    if data['identifier'] is None or len(data['identifier'].strip())==0:
88
        return
89
 
15616 kshitij.so 90
    if data['_id'] in ignoreItems:
91
        print "Ignored items returning for %d"%(data['_id'])
92
        return 
93
 
14147 kshitij.so 94
    try:
95
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
96
            print "sku id is already updated",data['_id'] 
97
            return
98
    except:
99
        pass
100
 
15959 kshitij.so 101
    url = "http://www.amazon.in/gp/aw/ol/%s?o=New&op=1"%(data['identifier'])
14308 kshitij.so 102
 
103
    try:
104
        if data['dealFlag'] ==1 and data['dealType'] ==1:
105
            deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
106
            dealScraping = True
107
    except:
108
        data['dealFlag'] = 0
109
        data['dealType'] = 0
110
 
14147 kshitij.so 111
    print url
112
    lowestPrice = 0.0
14308 kshitij.so 113
 
114
    if not dealScraping:
115
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
116
        lowestPrice = scraperAmazon.read(url)
117
    else:
118
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
119
        lowestPrice = dealScraperAmazon.read(deal_url)
120
        if lowestPrice == 0:
121
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
122
            lowestPrice = scraperAmazon.read(url)
123
            dealScraping = False
124
 
14147 kshitij.so 125
    print lowestPrice
126
    if lowestPrice > 0:
127
        inStock = 1
128
    print lowestPrice
129
    print inStock
130
    if lowestPrice > 0:
131
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
16019 kshitij.so 132
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable']}}, multi=True)
14147 kshitij.so 133
    else:
134
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
16019 kshitij.so 135
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable']}})
14147 kshitij.so 136
 
137
    try:
15267 kshitij.so 138
        recomputeDeal(data)
14147 kshitij.so 139
    except:
140
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 141
 
14325 kshitij.so 142
def populateNegativeDeals():
143
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
15267 kshitij.so 144
    mc.set("negative_deals", negativeDeals, 600)
13828 kshitij.so 145
 
15267 kshitij.so 146
def recomputePoints(item, deal):
147
    try:
15342 kshitij.so 148
        if item.get('available_price') == deal['available_price']:
15341 kshitij.so 149
            print "No need to compute points for %d , as price is still same" %(item['_id'])
150
            raise
15267 kshitij.so 151
        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
152
    except:
153
        traceback.print_exc()
154
        nlcPoints = deal['nlcPoints']
155
    if item['manualDealThresholdPrice'] >= deal['available_price']:
156
        dealPoints = item['dealPoints']
157
    else:
158
        dealPoints = 0
159
    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})
160
 
161
 
162
 
163
def recomputeDeal(item):
13914 kshitij.so 164
    """Lets recompute deal for this bundle"""
16019 kshitij.so 165
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
15267 kshitij.so 166
    skuBundleId = item['skuBundleId']
13914 kshitij.so 167
 
168
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
169
    bestPrice = float("inf")
170
    bestOne = None
171
    bestSellerPoints = 0
172
    toUpdate = []
16019 kshitij.so 173
    prepaidBestPrice = float("inf")
174
    prepaidBestOne = None
175
    prepaidBestSellerPoints = 0
13914 kshitij.so 176
    for similarItem in similarItems:
15267 kshitij.so 177
        if similarItem['_id'] == item['_id']:
178
            try:
179
                recomputePoints(item, similarItem)
180
            except:
181
                traceback.print_exc()
16019 kshitij.so 182
        if similarItem['codAvailable'] ==1:
183
            if mc.get("negative_deals") is None:
184
                populateNegativeDeals()
16173 kshitij.so 185
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 186
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
187
                continue
188
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
189
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
190
                continue
191
            if similarItem['available_price'] < bestPrice:
192
                bestOne = similarItem
193
                bestPrice = similarItem['available_price']
194
                bestSellerPoints = similarItem['bestSellerPoints']
195
            elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
196
                bestOne = similarItem
197
                bestPrice = similarItem['available_price']
198
                bestSellerPoints = similarItem['bestSellerPoints']
199
            else:
200
                pass
13914 kshitij.so 201
        else:
16019 kshitij.so 202
            if mc.get("negative_deals") is None:
203
                populateNegativeDeals()
16173 kshitij.so 204
            if similarItem['in_stock'] == 0  or similarItem['_id'] in mc.get("negative_deals"):
16019 kshitij.so 205
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
206
                continue
207
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
208
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
209
                continue
210
            if similarItem['available_price'] < prepaidBestPrice:
211
                prepaidBestOne = similarItem
212
                prepaidBestPrice = similarItem['available_price']
213
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
214
            elif similarItem['available_price'] == prepaidBestPrice and prepaidBestSellerPoints < similarItem['bestSellerPoints']:
215
                prepaidBestOne = similarItem
216
                prepaidBestPrice = similarItem['available_price']
217
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
218
            else:
219
                pass
16026 kshitij.so 220
    if bestOne is not None or prepaidBestOne is not None:
13914 kshitij.so 221
        for similarItem in similarItems:
222
            toUpdate.append(similarItem['_id'])
16026 kshitij.so 223
        if bestOne is not None:
224
            toUpdate.remove(bestOne['_id'])
225
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
226
        if prepaidBestOne is not None:
16073 kshitij.so 227
            if bestOne is not None:
228
                if prepaidBestOne['available_price'] < bestOne['available_price']: 
229
                    toUpdate.remove(prepaidBestOne['_id'])
230
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
231
            else:
232
                toUpdate.remove(prepaidBestOne['_id'])
233
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
13914 kshitij.so 234
    if len(toUpdate) > 0:
16019 kshitij.so 235
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)
13914 kshitij.so 236
 
13828 kshitij.so 237
def main():
14147 kshitij.so 238
    populate()
13828 kshitij.so 239
 
240
if __name__=='__main__':
241
    main()