Subversion Repositories SmartDukaan

Rev

Rev 14147 | Rev 14170 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
13832 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper
14147 kshitij.so 5
from multiprocessing.dummy import Pool as ThreadPool 
13828 kshitij.so 6
 
7
con = None
8
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
9
 
10
 
11
def get_mongo_connection(host='localhost', port=27017):
12
    global con
13
    if con is None:
14
        print "Establishing connection %s host and port %d" %(host,port)
15
        try:
16
            con = pymongo.MongoClient(host, port)
17
        except Exception, e:
18
            print e
19
            return None
20
    return con
21
 
14147 kshitij.so 22
def populate():
23
    toScrapMap = {}
14133 kshitij.so 24
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
25
    for bestSeller in bestSellers: 
26
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
27
        for data in amazonBestSellers:
14147 kshitij.so 28
            if not toScrapMap.has_key(data['_id']):
29
                toScrapMap[data['_id']] = data
30
    for k, y in toScrapMap.iteritems():
31
        print k,
32
        print '\t',
33
        print y
34
    pool = ThreadPool(50)
35
    pool.map(scrapeAmazon,toScrapMap.values())
36
    pool.close()
37
    pool.join()
38
    print "joining threads"
39
 
40
 
41
def scrapeAmazon(data):
42
    inStock = 0
43
    print str(data['identifier'])
44
    if data['identifier'] is None or len(data['identifier'].strip())==0:
45
        return
46
 
47
    try:
48
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
49
            print "sku id is already updated",data['_id'] 
50
            return
51
    except:
52
        pass
53
 
54
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
55
    print url
56
    lowestPrice = 0.0
57
    lowestPrice = scraperAmazon.read(url)
58
    print lowestPrice
59
    if lowestPrice > 0:
60
        inStock = 1
61
    print lowestPrice
62
    print inStock
63
    if lowestPrice > 0:
64
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
65
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
66
    else:
67
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
68
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
69
 
70
    try:
71
        recomputeDeal(data['skuBundleId'])
72
    except:
73
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 74
 
75
 
13828 kshitij.so 76
 
13914 kshitij.so 77
def recomputeDeal(skuBundleId):
78
    """Lets recompute deal for this bundle"""
79
    print "Recomputing for bundleId",skuBundleId
80
 
81
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
82
    bestPrice = float("inf")
83
    bestOne = None
84
    bestSellerPoints = 0
85
    toUpdate = []
86
    for similarItem in similarItems:
13972 kshitij.so 87
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
13914 kshitij.so 88
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
89
            continue
90
        if similarItem['available_price'] < bestPrice:
91
            bestOne = similarItem
92
            bestPrice = similarItem['available_price']
93
            bestSellerPoints = similarItem['bestSellerPoints']
94
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
95
            bestOne = similarItem
96
            bestPrice = similarItem['available_price']
97
            bestSellerPoints = similarItem['bestSellerPoints']
98
        else:
99
            pass
100
    if bestOne is not None:
101
        for similarItem in similarItems:
102
            toUpdate.append(similarItem['_id'])
103
        toUpdate.remove(bestOne['_id'])
104
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
105
    if len(toUpdate) > 0:
106
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
107
 
13828 kshitij.so 108
def main():
14147 kshitij.so 109
    populate()
13828 kshitij.so 110
 
111
if __name__=='__main__':
112
    main()