Subversion Repositories SmartDukaan

Rev

Rev 14260 | Rev 14325 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
14308 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
13828 kshitij.so 8
 
9
con = None
10
 
14256 kshitij.so 11
parser = optparse.OptionParser()
12
parser.add_option("-m", "--m", dest="mongoHost",
13
                      default="localhost",
14
                      type="string", help="The HOST where the mongo server is running",
15
                      metavar="mongo_host")
16
 
17
(options, args) = parser.parse_args()
18
 
19
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 20
    global con
21
    if con is None:
22
        print "Establishing connection %s host and port %d" %(host,port)
23
        try:
24
            con = pymongo.MongoClient(host, port)
25
        except Exception, e:
26
            print e
27
            return None
28
    return con
29
 
14147 kshitij.so 30
def populate():
31
    toScrapMap = {}
14133 kshitij.so 32
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
33
    for bestSeller in bestSellers: 
34
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
35
        for data in amazonBestSellers:
14147 kshitij.so 36
            if not toScrapMap.has_key(data['_id']):
37
                toScrapMap[data['_id']] = data
14250 kshitij.so 38
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
39
    for deal in dealFlagged:
40
        if not toScrapMap.has_key(deal['_id']):
14260 kshitij.so 41
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
42
            toScrapMap[deal['_id']] = data[0]
14308 kshitij.so 43
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
44
    for manualDeal in manualDeals:
45
        if not toScrapMap.has_key(manualDeal['sku']):
46
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
47
            if len(data) > 0:
48
                data[0]['dealFlag'] = 1
49
                data[0]['dealType'] = manualDeal['dealType']
50
                toScrapMap[manualDeal['sku']] = data[0]
51
        else:
52
            data = toScrapMap.get(manualDeal['sku'])
53
            data['dealFlag'] = 1
54
            data['dealType'] = manualDeal['dealType']
14170 kshitij.so 55
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 56
    pool.map(scrapeAmazon,toScrapMap.values())
57
    pool.close()
58
    pool.join()
14250 kshitij.so 59
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 60
 
61
 
62
def scrapeAmazon(data):
63
    inStock = 0
14308 kshitij.so 64
    dealScraping = False
14147 kshitij.so 65
    print str(data['identifier'])
66
    if data['identifier'] is None or len(data['identifier'].strip())==0:
67
        return
68
 
69
    try:
70
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
71
            print "sku id is already updated",data['_id'] 
72
            return
73
    except:
74
        pass
75
 
76
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
14308 kshitij.so 77
 
78
    try:
79
        if data['dealFlag'] ==1 and data['dealType'] ==1:
80
            deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
81
            dealScraping = True
82
    except:
83
        data['dealFlag'] = 0
84
        data['dealType'] = 0
85
 
14147 kshitij.so 86
    print url
87
    lowestPrice = 0.0
14308 kshitij.so 88
 
89
    if not dealScraping:
90
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
91
        lowestPrice = scraperAmazon.read(url)
92
    else:
93
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
94
        lowestPrice = dealScraperAmazon.read(deal_url)
95
        if lowestPrice == 0:
96
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
97
            lowestPrice = scraperAmazon.read(url)
98
            dealScraping = False
99
 
14147 kshitij.so 100
    print lowestPrice
101
    if lowestPrice > 0:
102
        inStock = 1
103
    print lowestPrice
104
    print inStock
105
    if lowestPrice > 0:
106
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
14308 kshitij.so 107
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
14147 kshitij.so 108
    else:
109
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
14308 kshitij.so 110
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
14147 kshitij.so 111
 
112
    try:
113
        recomputeDeal(data['skuBundleId'])
114
    except:
115
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 116
 
117
 
13828 kshitij.so 118
 
13914 kshitij.so 119
def recomputeDeal(skuBundleId):
120
    """Lets recompute deal for this bundle"""
121
    print "Recomputing for bundleId",skuBundleId
122
 
123
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
124
    bestPrice = float("inf")
125
    bestOne = None
126
    bestSellerPoints = 0
127
    toUpdate = []
128
    for similarItem in similarItems:
13972 kshitij.so 129
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
13914 kshitij.so 130
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
131
            continue
132
        if similarItem['available_price'] < bestPrice:
133
            bestOne = similarItem
134
            bestPrice = similarItem['available_price']
135
            bestSellerPoints = similarItem['bestSellerPoints']
136
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
137
            bestOne = similarItem
138
            bestPrice = similarItem['available_price']
139
            bestSellerPoints = similarItem['bestSellerPoints']
140
        else:
141
            pass
142
    if bestOne is not None:
143
        for similarItem in similarItems:
144
            toUpdate.append(similarItem['_id'])
145
        toUpdate.remove(bestOne['_id'])
146
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
147
    if len(toUpdate) > 0:
148
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
149
 
13828 kshitij.so 150
def main():
14147 kshitij.so 151
    populate()
13828 kshitij.so 152
 
153
if __name__=='__main__':
154
    main()