Subversion Repositories SmartDukaan

Rev

Rev 14250 | Rev 14260 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
13832 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
13828 kshitij.so 8
 
9
con = None
10
 
14256 kshitij.so 11
parser = optparse.OptionParser()
12
parser.add_option("-m", "--m", dest="mongoHost",
13
                      default="localhost",
14
                      type="string", help="The HOST where the mongo server is running",
15
                      metavar="mongo_host")
16
 
17
(options, args) = parser.parse_args()
18
 
19
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 20
    global con
21
    if con is None:
22
        print "Establishing connection %s host and port %d" %(host,port)
23
        try:
24
            con = pymongo.MongoClient(host, port)
25
        except Exception, e:
26
            print e
27
            return None
28
    return con
29
 
14147 kshitij.so 30
def populate():
31
    toScrapMap = {}
14133 kshitij.so 32
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
33
    for bestSeller in bestSellers: 
34
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
35
        for data in amazonBestSellers:
14147 kshitij.so 36
            if not toScrapMap.has_key(data['_id']):
37
                toScrapMap[data['_id']] = data
14250 kshitij.so 38
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
39
    for deal in dealFlagged:
40
        if not toScrapMap.has_key(deal['_id']):
41
            toScrapMap[deal['_id']] = deal
14170 kshitij.so 42
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 43
    pool.map(scrapeAmazon,toScrapMap.values())
44
    pool.close()
45
    pool.join()
14250 kshitij.so 46
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 47
 
48
 
49
def scrapeAmazon(data):
50
    inStock = 0
51
    print str(data['identifier'])
52
    if data['identifier'] is None or len(data['identifier'].strip())==0:
53
        return
54
 
55
    try:
56
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
57
            print "sku id is already updated",data['_id'] 
58
            return
59
    except:
60
        pass
61
 
62
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
63
    print url
64
    lowestPrice = 0.0
14170 kshitij.so 65
    scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
14147 kshitij.so 66
    lowestPrice = scraperAmazon.read(url)
67
    print lowestPrice
68
    if lowestPrice > 0:
69
        inStock = 1
70
    print lowestPrice
71
    print inStock
72
    if lowestPrice > 0:
73
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
74
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
75
    else:
76
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
77
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
78
 
79
    try:
80
        recomputeDeal(data['skuBundleId'])
81
    except:
82
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 83
 
84
 
13828 kshitij.so 85
 
13914 kshitij.so 86
def recomputeDeal(skuBundleId):
87
    """Lets recompute deal for this bundle"""
88
    print "Recomputing for bundleId",skuBundleId
89
 
90
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
91
    bestPrice = float("inf")
92
    bestOne = None
93
    bestSellerPoints = 0
94
    toUpdate = []
95
    for similarItem in similarItems:
13972 kshitij.so 96
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
13914 kshitij.so 97
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
98
            continue
99
        if similarItem['available_price'] < bestPrice:
100
            bestOne = similarItem
101
            bestPrice = similarItem['available_price']
102
            bestSellerPoints = similarItem['bestSellerPoints']
103
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
104
            bestOne = similarItem
105
            bestPrice = similarItem['available_price']
106
            bestSellerPoints = similarItem['bestSellerPoints']
107
        else:
108
            pass
109
    if bestOne is not None:
110
        for similarItem in similarItems:
111
            toUpdate.append(similarItem['_id'])
112
        toUpdate.remove(bestOne['_id'])
113
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
114
    if len(toUpdate) > 0:
115
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
116
 
13828 kshitij.so 117
def main():
14147 kshitij.so 118
    populate()
13828 kshitij.so 119
 
120
if __name__=='__main__':
121
    main()