Subversion Repositories SmartDukaan

Rev

Rev 14157 | Rev 14174 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
13832 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper
14170 kshitij.so 5
from multiprocessing.dummy import Pool as ThreadPool
6
from multiprocessing import cpu_count
13828 kshitij.so 7
 
8
con = None
9
 
10
def get_mongo_connection(host='localhost', port=27017):
11
    global con
12
    if con is None:
13
        print "Establishing connection %s host and port %d" %(host,port)
14
        try:
15
            con = pymongo.MongoClient(host, port)
16
        except Exception, e:
17
            print e
18
            return None
19
    return con
20
 
14147 kshitij.so 21
def populate():
22
    toScrapMap = {}
14133 kshitij.so 23
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
24
    for bestSeller in bestSellers: 
25
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
26
        for data in amazonBestSellers:
14147 kshitij.so 27
            if not toScrapMap.has_key(data['_id']):
28
                toScrapMap[data['_id']] = data
29
    for k, y in toScrapMap.iteritems():
30
        print k,
31
        print '\t',
32
        print y
14170 kshitij.so 33
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 34
    pool.map(scrapeAmazon,toScrapMap.values())
35
    pool.close()
36
    pool.join()
37
    print "joining threads"
14170 kshitij.so 38
    print datetime.now()
14147 kshitij.so 39
 
40
 
41
def scrapeAmazon(data):
42
    inStock = 0
43
    print str(data['identifier'])
44
    if data['identifier'] is None or len(data['identifier'].strip())==0:
45
        return
46
 
47
    try:
48
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
49
            print "sku id is already updated",data['_id'] 
50
            return
51
    except:
52
        pass
53
 
54
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
55
    print url
56
    lowestPrice = 0.0
14170 kshitij.so 57
    scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
14147 kshitij.so 58
    lowestPrice = scraperAmazon.read(url)
59
    print lowestPrice
60
    if lowestPrice > 0:
61
        inStock = 1
62
    print lowestPrice
63
    print inStock
64
    if lowestPrice > 0:
65
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
66
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
67
    else:
68
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
69
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
70
 
71
    try:
72
        recomputeDeal(data['skuBundleId'])
73
    except:
74
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 75
 
76
 
13828 kshitij.so 77
 
13914 kshitij.so 78
def recomputeDeal(skuBundleId):
79
    """Lets recompute deal for this bundle"""
80
    print "Recomputing for bundleId",skuBundleId
81
 
82
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
83
    bestPrice = float("inf")
84
    bestOne = None
85
    bestSellerPoints = 0
86
    toUpdate = []
87
    for similarItem in similarItems:
13972 kshitij.so 88
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
13914 kshitij.so 89
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
90
            continue
91
        if similarItem['available_price'] < bestPrice:
92
            bestOne = similarItem
93
            bestPrice = similarItem['available_price']
94
            bestSellerPoints = similarItem['bestSellerPoints']
95
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
96
            bestOne = similarItem
97
            bestPrice = similarItem['available_price']
98
            bestSellerPoints = similarItem['bestSellerPoints']
99
        else:
100
            pass
101
    if bestOne is not None:
102
        for similarItem in similarItems:
103
            toUpdate.append(similarItem['_id'])
104
        toUpdate.remove(bestOne['_id'])
105
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
106
    if len(toUpdate) > 0:
107
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
108
 
13828 kshitij.so 109
def main():
14147 kshitij.so 110
    populate()
13828 kshitij.so 111
 
112
if __name__=='__main__':
113
    main()