Subversion Repositories SmartDukaan

Rev

Rev 15856 | Rev 15959 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13828 kshitij.so 1
import pymongo
15267 kshitij.so 2
from dtr.utils.utils import to_java_date, getNlcPoints
13914 kshitij.so 3
from datetime import datetime, timedelta
14308 kshitij.so 4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
14174 kshitij.so 5
from multiprocessing import Pool as ThreadPool
14170 kshitij.so 6
from multiprocessing import cpu_count
14256 kshitij.so 7
import optparse
14325 kshitij.so 8
from dtr.storage.MemCache import MemCache
14705 kshitij.so 9
from dtr.utils.utils import getCashBack
15267 kshitij.so 10
import traceback
13828 kshitij.so 11
 
12
con = None
13
 
14256 kshitij.so 14
parser = optparse.OptionParser()
15
parser.add_option("-m", "--m", dest="mongoHost",
16
                      default="localhost",
17
                      type="string", help="The HOST where the mongo server is running",
18
                      metavar="mongo_host")
19
 
20
(options, args) = parser.parse_args()
21
 
14325 kshitij.so 22
mc = MemCache(options.mongoHost)
23
 
15876 kshitij.so 24
ignoreItems = []
15616 kshitij.so 25
 
14256 kshitij.so 26
def get_mongo_connection(host=options.mongoHost, port=27017):
13828 kshitij.so 27
    global con
28
    if con is None:
29
        print "Establishing connection %s host and port %d" %(host,port)
30
        try:
31
            con = pymongo.MongoClient(host, port)
32
        except Exception, e:
33
            print e
34
            return None
35
    return con
36
 
14147 kshitij.so 37
def populate():
38
    toScrapMap = {}
14133 kshitij.so 39
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
40
    for bestSeller in bestSellers: 
41
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
42
        for data in amazonBestSellers:
14147 kshitij.so 43
            if not toScrapMap.has_key(data['_id']):
15267 kshitij.so 44
                data['dealFlag'] = 0
45
                data['dealType'] = 0
46
                data['dealPoints'] = 0
47
                data['manualDealThresholdPrice'] = None
14147 kshitij.so 48
                toScrapMap[data['_id']] = data
14250 kshitij.so 49
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
50
    for deal in dealFlagged:
51
        if not toScrapMap.has_key(deal['_id']):
14260 kshitij.so 52
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
15267 kshitij.so 53
            data[0]['dealFlag'] = 0
54
            data[0]['dealType'] = 0
55
            data[0]['dealPoints'] = 0
56
            data[0]['manualDealThresholdPrice'] = None
14260 kshitij.so 57
            toScrapMap[deal['_id']] = data[0]
14308 kshitij.so 58
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
59
    for manualDeal in manualDeals:
60
        if not toScrapMap.has_key(manualDeal['sku']):
61
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
62
            if len(data) > 0:
63
                data[0]['dealFlag'] = 1
64
                data[0]['dealType'] = manualDeal['dealType']
15267 kshitij.so 65
                data[0]['dealPoints'] = manualDeal['dealPoints']
66
                data[0]['manualDealThresholdPrice'] = manualDeal['dealThresholdPrice']
14308 kshitij.so 67
                toScrapMap[manualDeal['sku']] = data[0]
68
        else:
69
            data = toScrapMap.get(manualDeal['sku'])
70
            data['dealFlag'] = 1
71
            data['dealType'] = manualDeal['dealType']
15267 kshitij.so 72
            data['dealPoints'] = manualDeal['dealPoints']
73
            data['manualDealThresholdPrice'] = manualDeal['dealThresholdPrice']
14170 kshitij.so 74
    pool = ThreadPool(cpu_count() * 2)
14147 kshitij.so 75
    pool.map(scrapeAmazon,toScrapMap.values())
76
    pool.close()
77
    pool.join()
14250 kshitij.so 78
    print "joining threads at %s"%(str(datetime.now()))
14147 kshitij.so 79
 
80
 
81
def scrapeAmazon(data):
82
    inStock = 0
14308 kshitij.so 83
    dealScraping = False
14147 kshitij.so 84
    print str(data['identifier'])
85
    if data['identifier'] is None or len(data['identifier'].strip())==0:
86
        return
87
 
15616 kshitij.so 88
    if data['_id'] in ignoreItems:
89
        print "Ignored items returning for %d"%(data['_id'])
90
        return 
91
 
14147 kshitij.so 92
    try:
93
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
94
            print "sku id is already updated",data['_id'] 
95
            return
96
    except:
97
        pass
98
 
99
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
14308 kshitij.so 100
 
101
    try:
102
        if data['dealFlag'] ==1 and data['dealType'] ==1:
103
            deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
104
            dealScraping = True
105
    except:
106
        data['dealFlag'] = 0
107
        data['dealType'] = 0
108
 
14147 kshitij.so 109
    print url
110
    lowestPrice = 0.0
14308 kshitij.so 111
 
112
    if not dealScraping:
113
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
114
        lowestPrice = scraperAmazon.read(url)
115
    else:
116
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
117
        lowestPrice = dealScraperAmazon.read(deal_url)
118
        if lowestPrice == 0:
119
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
120
            lowestPrice = scraperAmazon.read(url)
121
            dealScraping = False
122
 
14147 kshitij.so 123
    print lowestPrice
124
    if lowestPrice > 0:
125
        inStock = 1
126
    print lowestPrice
127
    print inStock
128
    if lowestPrice > 0:
129
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
14308 kshitij.so 130
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
14147 kshitij.so 131
    else:
132
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
14308 kshitij.so 133
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
14147 kshitij.so 134
 
135
    try:
15267 kshitij.so 136
        recomputeDeal(data)
14147 kshitij.so 137
    except:
138
        print "Unable to compute deal for ",data['skuBundleId']    
13914 kshitij.so 139
 
14325 kshitij.so 140
def populateNegativeDeals():
141
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
15267 kshitij.so 142
    mc.set("negative_deals", negativeDeals, 600)
13828 kshitij.so 143
 
15267 kshitij.so 144
def recomputePoints(item, deal):
145
    try:
15342 kshitij.so 146
        if item.get('available_price') == deal['available_price']:
15341 kshitij.so 147
            print "No need to compute points for %d , as price is still same" %(item['_id'])
148
            raise
15267 kshitij.so 149
        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
150
    except:
151
        traceback.print_exc()
152
        nlcPoints = deal['nlcPoints']
153
    if item['manualDealThresholdPrice'] >= deal['available_price']:
154
        dealPoints = item['dealPoints']
155
    else:
156
        dealPoints = 0
157
    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})
158
 
159
 
160
 
161
def recomputeDeal(item):
13914 kshitij.so 162
    """Lets recompute deal for this bundle"""
15267 kshitij.so 163
    print "Recomputing for bundleId",item.get('skuBundleId')
164
    skuBundleId = item['skuBundleId']
13914 kshitij.so 165
 
166
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
167
    bestPrice = float("inf")
168
    bestOne = None
169
    bestSellerPoints = 0
170
    toUpdate = []
171
    for similarItem in similarItems:
14328 kshitij.so 172
        if mc.get("negative_deals") is None:
14325 kshitij.so 173
            populateNegativeDeals()
15267 kshitij.so 174
#        try:
175
#            cashBack = getCashBack(similarItem['_id'], similarItem['source_id'], similarItem['category_id'], mc, options.mongoHost)
176
#            if not cashBack or cashBack.get('cash_back_status')!=1:
177
#                pass
178
#            else:
179
#                if cashBack['cash_back_type'] ==1:
180
#                    similarItem['available_price'] = similarItem['available_price'] - similarItem['available_price'] * float(cashBack['cash_back'])/100
181
#                elif cashBack['cash_back_type'] ==2:
182
#                    similarItem['available_price'] = similarItem['available_price'] - float(cashBack['cash_back'])
183
#                else:
184
#                    pass
185
#        except Exception as cashBackEx:
186
#            print cashBackEx
187
#            print "Error calculating cashback."
188
        if similarItem['_id'] == item['_id']:
189
            try:
190
                recomputePoints(item, similarItem)
191
            except:
192
                traceback.print_exc()
14328 kshitij.so 193
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price'] or similarItem['_id'] in mc.get("negative_deals"):
13914 kshitij.so 194
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
195
            continue
196
        if similarItem['available_price'] < bestPrice:
197
            bestOne = similarItem
198
            bestPrice = similarItem['available_price']
199
            bestSellerPoints = similarItem['bestSellerPoints']
200
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
201
            bestOne = similarItem
202
            bestPrice = similarItem['available_price']
203
            bestSellerPoints = similarItem['bestSellerPoints']
204
        else:
205
            pass
206
    if bestOne is not None:
207
        for similarItem in similarItems:
208
            toUpdate.append(similarItem['_id'])
209
        toUpdate.remove(bestOne['_id'])
210
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
211
    if len(toUpdate) > 0:
212
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
213
 
14705 kshitij.so 214
 
215
 
13828 kshitij.so 216
def main():
14147 kshitij.so 217
    populate()
13828 kshitij.so 218
 
219
if __name__=='__main__':
220
    main()