Subversion Repositories SmartDukaan

Rev

Rev 14328 | Rev 15267 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
2
from dtr.utils.utils import to_java_date
13914 kshitij.so 3
from datetime import datetime, timedelta
14308 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
14325 kshitij.so 8
from dtr.storage.MemCache import MemCache
14705 kshitij.so 9
from dtr.utils.utils import getCashBack
13828 kshitij.so 10
 
11
con = None
12
 
14256 kshitij.so 13
parser = optparse.OptionParser()
14
parser.add_option("-m", "--m", dest="mongoHost",
15
                      default="localhost",
16
                      type="string", help="The HOST where the mongo server is running",
17
                      metavar="mongo_host")
18
 
19
(options, args) = parser.parse_args()
20
 
14325 kshitij.so 21
mc = MemCache(options.mongoHost)
22
 
14256 kshitij.so 23
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 24
    global con
25
    if con is None:
26
        print "Establishing connection %s host and port %d" %(host,port)
27
        try:
28
            con = pymongo.MongoClient(host, port)
29
        except Exception, e:
30
            print e
31
            return None
32
    return con
33
 
14147 kshitij.so 34
def populate():
35
    toScrapMap = {}
14133 kshitij.so 36
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
37
    for bestSeller in bestSellers: 
38
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
39
        for data in amazonBestSellers:
14147 kshitij.so 40
            if not toScrapMap.has_key(data['_id']):
41
                toScrapMap[data['_id']] = data
14250 kshitij.so 42
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
43
    for deal in dealFlagged:
44
        if not toScrapMap.has_key(deal['_id']):
14260 kshitij.so 45
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
46
            toScrapMap[deal['_id']] = data[0]
14308 kshitij.so 47
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
48
    for manualDeal in manualDeals:
49
        if not toScrapMap.has_key(manualDeal['sku']):
50
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
51
            if len(data) > 0:
52
                data[0]['dealFlag'] = 1
53
                data[0]['dealType'] = manualDeal['dealType']
54
                toScrapMap[manualDeal['sku']] = data[0]
55
        else:
56
            data = toScrapMap.get(manualDeal['sku'])
57
            data['dealFlag'] = 1
58
            data['dealType'] = manualDeal['dealType']
14170 kshitij.so 59
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 60
    pool.map(scrapeAmazon,toScrapMap.values())
61
    pool.close()
62
    pool.join()
14250 kshitij.so 63
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 64
 
65
 
66
def scrapeAmazon(data):
67
    inStock = 0
14308 kshitij.so 68
    dealScraping = False
14147 kshitij.so 69
    print str(data['identifier'])
70
    if data['identifier'] is None or len(data['identifier'].strip())==0:
71
        return
72
 
73
    try:
74
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
75
            print "sku id is already updated",data['_id'] 
76
            return
77
    except:
78
        pass
79
 
80
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
14308 kshitij.so 81
 
82
    try:
83
        if data['dealFlag'] ==1 and data['dealType'] ==1:
84
            deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
85
            dealScraping = True
86
    except:
87
        data['dealFlag'] = 0
88
        data['dealType'] = 0
89
 
14147 kshitij.so 90
    print url
91
    lowestPrice = 0.0
14308 kshitij.so 92
 
93
    if not dealScraping:
94
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
95
        lowestPrice = scraperAmazon.read(url)
96
    else:
97
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
98
        lowestPrice = dealScraperAmazon.read(deal_url)
99
        if lowestPrice == 0:
100
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
101
            lowestPrice = scraperAmazon.read(url)
102
            dealScraping = False
103
 
14147 kshitij.so 104
    print lowestPrice
105
    if lowestPrice > 0:
106
        inStock = 1
107
    print lowestPrice
108
    print inStock
109
    if lowestPrice > 0:
110
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
14308 kshitij.so 111
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
14147 kshitij.so 112
    else:
113
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
14308 kshitij.so 114
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
14147 kshitij.so 115
 
116
    try:
117
        recomputeDeal(data['skuBundleId'])
118
    except:
119
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 120
 
14325 kshitij.so 121
def populateNegativeDeals():
122
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
123
    mc.set("negative_deals", negativeDeals, 600)      
13828 kshitij.so 124
 
13914 kshitij.so 125
def recomputeDeal(skuBundleId):
126
    """Lets recompute deal for this bundle"""
127
    print "Recomputing for bundleId",skuBundleId
128
 
129
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
130
    bestPrice = float("inf")
131
    bestOne = None
132
    bestSellerPoints = 0
133
    toUpdate = []
134
    for similarItem in similarItems:
14328 kshitij.so 135
        if mc.get("negative_deals") is None:
14325 kshitij.so 136
            populateNegativeDeals()
14705 kshitij.so 137
        try:
138
            cashBack = getCashBack(similarItem['_id'], similarItem['source_id'], similarItem['category_id'], mc, options.mongoHost)
139
            if not cashBack or cashBack.get('cash_back_status')!=1:
140
                pass
141
            else:
142
                if cashBack['cash_back_type'] ==1:
143
                    similarItem['available_price'] = similarItem['available_price'] - similarItem['available_price'] * float(cashBack['cash_back'])/100
144
                elif cashBack['cash_back_type'] ==2:
145
                    similarItem['available_price'] = similarItem['available_price'] - float(cashBack['cash_back'])
146
                else:
147
                    pass
148
        except Exception as cashBackEx:
149
            print cashBackEx
150
            print "Error calculating cashback."
14328 kshitij.so 151
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price'] or similarItem['_id'] in mc.get("negative_deals"):
13914 kshitij.so 152
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
153
            continue
154
        if similarItem['available_price'] < bestPrice:
155
            bestOne = similarItem
156
            bestPrice = similarItem['available_price']
157
            bestSellerPoints = similarItem['bestSellerPoints']
158
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
159
            bestOne = similarItem
160
            bestPrice = similarItem['available_price']
161
            bestSellerPoints = similarItem['bestSellerPoints']
162
        else:
163
            pass
164
    if bestOne is not None:
165
        for similarItem in similarItems:
166
            toUpdate.append(similarItem['_id'])
167
        toUpdate.remove(bestOne['_id'])
168
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
169
    if len(toUpdate) > 0:
170
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
171
 
14705 kshitij.so 172
 
173
 
13828 kshitij.so 174
def main():
14147 kshitij.so 175
    populate()
13828 kshitij.so 176
 
177
if __name__=='__main__':
178
    main()