Subversion Repositories SmartDukaan

Rev

Rev 14174 | Rev 14256 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
13832 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
13828 kshitij.so 7
 
8
con = None
9
 
10
def get_mongo_connection(host='localhost', port=27017):
11
    global con
12
    if con is None:
13
        print "Establishing connection %s host and port %d" %(host,port)
14
        try:
15
            con = pymongo.MongoClient(host, port)
16
        except Exception, e:
17
            print e
18
            return None
19
    return con
20
 
14147 kshitij.so 21
def populate():
22
    toScrapMap = {}
14133 kshitij.so 23
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
24
    for bestSeller in bestSellers: 
25
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
26
        for data in amazonBestSellers:
14147 kshitij.so 27
            if not toScrapMap.has_key(data['_id']):
28
                toScrapMap[data['_id']] = data
14250 kshitij.so 29
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
30
    for deal in dealFlagged:
31
        if not toScrapMap.has_key(deal['_id']):
32
            toScrapMap[deal['_id']] = deal
14170 kshitij.so 33
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 34
    pool.map(scrapeAmazon,toScrapMap.values())
35
    pool.close()
36
    pool.join()
14250 kshitij.so 37
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 38
 
39
 
40
def scrapeAmazon(data):
41
    inStock = 0
42
    print str(data['identifier'])
43
    if data['identifier'] is None or len(data['identifier'].strip())==0:
44
        return
45
 
46
    try:
47
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
48
            print "sku id is already updated",data['_id'] 
49
            return
50
    except:
51
        pass
52
 
53
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
54
    print url
55
    lowestPrice = 0.0
14170 kshitij.so 56
    scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
14147 kshitij.so 57
    lowestPrice = scraperAmazon.read(url)
58
    print lowestPrice
59
    if lowestPrice > 0:
60
        inStock = 1
61
    print lowestPrice
62
    print inStock
63
    if lowestPrice > 0:
64
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
65
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
66
    else:
67
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
68
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
69
 
70
    try:
71
        recomputeDeal(data['skuBundleId'])
72
    except:
73
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 74
 
75
 
13828 kshitij.so 76
 
13914 kshitij.so 77
def recomputeDeal(skuBundleId):
78
    """Lets recompute deal for this bundle"""
79
    print "Recomputing for bundleId",skuBundleId
80
 
81
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
82
    bestPrice = float("inf")
83
    bestOne = None
84
    bestSellerPoints = 0
85
    toUpdate = []
86
    for similarItem in similarItems:
13972 kshitij.so 87
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
13914 kshitij.so 88
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
89
            continue
90
        if similarItem['available_price'] < bestPrice:
91
            bestOne = similarItem
92
            bestPrice = similarItem['available_price']
93
            bestSellerPoints = similarItem['bestSellerPoints']
94
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
95
            bestOne = similarItem
96
            bestPrice = similarItem['available_price']
97
            bestSellerPoints = similarItem['bestSellerPoints']
98
        else:
99
            pass
100
    if bestOne is not None:
101
        for similarItem in similarItems:
102
            toUpdate.append(similarItem['_id'])
103
        toUpdate.remove(bestOne['_id'])
104
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
105
    if len(toUpdate) > 0:
106
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
107
 
13828 kshitij.so 108
def main():
14147 kshitij.so 109
    populate()
13828 kshitij.so 110
 
111
if __name__=='__main__':
112
    main()