Subversion Repositories SmartDukaan

Rev

Rev 14256 | Rev 14308 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
13832 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
13828 kshitij.so 8
 
9
con = None
10
 
14256 kshitij.so 11
parser = optparse.OptionParser()
12
parser.add_option("-m", "--m", dest="mongoHost",
13
                      default="localhost",
14
                      type="string", help="The HOST where the mongo server is running",
15
                      metavar="mongo_host")
16
 
17
(options, args) = parser.parse_args()
18
 
19
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 20
    global con
21
    if con is None:
22
        print "Establishing connection %s host and port %d" %(host,port)
23
        try:
24
            con = pymongo.MongoClient(host, port)
25
        except Exception, e:
26
            print e
27
            return None
28
    return con
29
 
14147 kshitij.so 30
def populate():
31
    toScrapMap = {}
14133 kshitij.so 32
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
33
    for bestSeller in bestSellers: 
34
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
35
        for data in amazonBestSellers:
14147 kshitij.so 36
            if not toScrapMap.has_key(data['_id']):
37
                toScrapMap[data['_id']] = data
14250 kshitij.so 38
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
39
    for deal in dealFlagged:
40
        if not toScrapMap.has_key(deal['_id']):
14260 kshitij.so 41
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
42
            toScrapMap[deal['_id']] = data[0]
14170 kshitij.so 43
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 44
    pool.map(scrapeAmazon,toScrapMap.values())
45
    pool.close()
46
    pool.join()
14250 kshitij.so 47
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 48
 
49
 
50
def scrapeAmazon(data):
51
    inStock = 0
52
    print str(data['identifier'])
53
    if data['identifier'] is None or len(data['identifier'].strip())==0:
54
        return
55
 
56
    try:
57
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
58
            print "sku id is already updated",data['_id'] 
59
            return
60
    except:
61
        pass
62
 
63
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
64
    print url
65
    lowestPrice = 0.0
14170 kshitij.so 66
    scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
14147 kshitij.so 67
    lowestPrice = scraperAmazon.read(url)
68
    print lowestPrice
69
    if lowestPrice > 0:
70
        inStock = 1
71
    print lowestPrice
72
    print inStock
73
    if lowestPrice > 0:
74
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
75
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
76
    else:
77
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
78
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
79
 
80
    try:
81
        recomputeDeal(data['skuBundleId'])
82
    except:
83
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 84
 
85
 
13828 kshitij.so 86
 
13914 kshitij.so 87
def recomputeDeal(skuBundleId):
88
    """Lets recompute deal for this bundle"""
89
    print "Recomputing for bundleId",skuBundleId
90
 
91
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
92
    bestPrice = float("inf")
93
    bestOne = None
94
    bestSellerPoints = 0
95
    toUpdate = []
96
    for similarItem in similarItems:
13972 kshitij.so 97
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
13914 kshitij.so 98
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
99
            continue
100
        if similarItem['available_price'] < bestPrice:
101
            bestOne = similarItem
102
            bestPrice = similarItem['available_price']
103
            bestSellerPoints = similarItem['bestSellerPoints']
104
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
105
            bestOne = similarItem
106
            bestPrice = similarItem['available_price']
107
            bestSellerPoints = similarItem['bestSellerPoints']
108
        else:
109
            pass
110
    if bestOne is not None:
111
        for similarItem in similarItems:
112
            toUpdate.append(similarItem['_id'])
113
        toUpdate.remove(bestOne['_id'])
114
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
115
    if len(toUpdate) > 0:
116
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
117
 
13828 kshitij.so 118
def main():
14147 kshitij.so 119
    populate()
13828 kshitij.so 120
 
121
if __name__=='__main__':
122
    main()