Subversion Repositories SmartDukaan

Rev

Rev 16026 | Rev 16334 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import pymongo
from dtr.utils.utils import to_java_date, getNlcPoints
from datetime import datetime, timedelta
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
from multiprocessing import Pool as ThreadPool
from multiprocessing import cpu_count
import optparse
from dtr.storage.MemCache import MemCache
from dtr.utils.utils import getCashBack
import traceback

con = None

parser = optparse.OptionParser()
parser.add_option("-m", "--m", dest="mongoHost",
                      default="localhost",
                      type="string", help="The HOST where the mongo server is running",
                      metavar="mongo_host")

(options, args) = parser.parse_args()

mc = MemCache(options.mongoHost)

ignoreItems = []

SOURCE_MAP = {'AMAZON':1,'FLIPKART':2,'SNAPDEAL':3,'SAHOLIC':4, 'SHOPCLUES.COM':5}

def get_mongo_connection(host=options.mongoHost, port=27017):
    global con
    if con is None:
        print "Establishing connection %s host and port %d" %(host,port)
        try:
            con = pymongo.MongoClient(host, port)
        except Exception, e:
            print e
            return None
    return con

def populate():
    toScrapMap = {}
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
    for bestSeller in bestSellers: 
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
        for data in amazonBestSellers:
            if not toScrapMap.has_key(data['_id']):
                data['dealFlag'] = 0
                data['dealType'] = 0
                data['dealPoints'] = 0
                data['manualDealThresholdPrice'] = None
                toScrapMap[data['_id']] = data
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
    for deal in dealFlagged:
        if not toScrapMap.has_key(deal['_id']):
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
            data[0]['dealFlag'] = 0
            data[0]['dealType'] = 0
            data[0]['dealPoints'] = 0
            data[0]['manualDealThresholdPrice'] = None
            toScrapMap[deal['_id']] = data[0]
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
    for manualDeal in manualDeals:
        if not toScrapMap.has_key(manualDeal['sku']):
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
            if len(data) > 0:
                data[0]['dealFlag'] = 1
                data[0]['dealType'] = manualDeal['dealType']
                data[0]['dealPoints'] = manualDeal['dealPoints']
                data[0]['manualDealThresholdPrice'] = manualDeal['dealThresholdPrice']
                toScrapMap[manualDeal['sku']] = data[0]
        else:
            data = toScrapMap.get(manualDeal['sku'])
            data['dealFlag'] = 1
            data['dealType'] = manualDeal['dealType']
            data['dealPoints'] = manualDeal['dealPoints']
            data['manualDealThresholdPrice'] = manualDeal['dealThresholdPrice']
    pool = ThreadPool(cpu_count() * 2)
    pool.map(scrapeAmazon,toScrapMap.values())
    pool.close()
    pool.join()
    print "joining threads at %s"%(str(datetime.now()))
        

def scrapeAmazon(data):
    inStock = 0
    dealScraping = False
    print str(data['identifier'])
    if data['identifier'] is None or len(data['identifier'].strip())==0:
        return
    
    if data['_id'] in ignoreItems:
        print "Ignored items returning for %d"%(data['_id'])
        return 
    
    try:
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
            print "sku id is already updated",data['_id'] 
            return
    except:
        pass
    
    url = "http://www.amazon.in/gp/aw/ol/%s?o=New&op=1"%(data['identifier'])
    
    try:
        if data['dealFlag'] ==1 and data['dealType'] ==1:
            deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
            dealScraping = True
    except:
        data['dealFlag'] = 0
        data['dealType'] = 0
        
    print url
    lowestPrice = 0.0
    
    if not dealScraping:
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
        lowestPrice = scraperAmazon.read(url)
    else:
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
        lowestPrice = dealScraperAmazon.read(deal_url)
        if lowestPrice == 0:
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
            lowestPrice = scraperAmazon.read(url)
            dealScraping = False
        
    print lowestPrice
    if lowestPrice > 0:
        inStock = 1
    print lowestPrice
    print inStock
    if lowestPrice > 0:
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable']}}, multi=True)
    else:
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType'],'codAvailable':data['codAvailable']}})
        
    try:
        recomputeDeal(data)
    except:
        print "Unable to compute deal for ",data['skuBundleId']    
            
def populateNegativeDeals():
    negativeDeals = get_mongo_connection().Catalog.NegativeDeals.find().distinct('sku')
    mc.set("negative_deals", negativeDeals, 600)

def recomputePoints(item, deal):
    try:
        if item.get('available_price') == deal['available_price']:
            print "No need to compute points for %d , as price is still same" %(item['_id'])
            raise
        nlcPoints = getNlcPoints(item, deal['minNlc'], deal['maxNlc'], deal['available_price'])
    except:
        traceback.print_exc()
        nlcPoints = deal['nlcPoints']
    if item['manualDealThresholdPrice'] >= deal['available_price']:
        dealPoints = item['dealPoints']
    else:
        dealPoints = 0
    get_mongo_connection().Catalog.Deals.update({'_id':deal['_id']},{"$set":{'totalPoints':deal['totalPoints'] - deal['nlcPoints'] + nlcPoints - deal['dealPoints'] +dealPoints , 'nlcPoints': nlcPoints, 'dealPoints': dealPoints, 'manualDealThresholdPrice': item['manualDealThresholdPrice']}})

          

def recomputeDeal(item):
    """Lets recompute deal for this bundle"""
    print "Recomputing for bundleId %d" %(item.get('skuBundleId'))
    skuBundleId = item['skuBundleId']
    
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
    bestPrice = float("inf")
    bestOne = None
    bestSellerPoints = 0
    toUpdate = []
    prepaidBestPrice = float("inf")
    prepaidBestOne = None
    prepaidBestSellerPoints = 0
    for similarItem in similarItems:
        if similarItem['_id'] == item['_id']:
            try:
                recomputePoints(item, similarItem)
            except:
                traceback.print_exc()
        if similarItem['codAvailable'] ==1:
            if mc.get("negative_deals") is None:
                populateNegativeDeals()
            if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price'] or similarItem['_id'] in mc.get("negative_deals"):
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
                continue
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
                continue
            if similarItem['available_price'] < bestPrice:
                bestOne = similarItem
                bestPrice = similarItem['available_price']
                bestSellerPoints = similarItem['bestSellerPoints']
            elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
                bestOne = similarItem
                bestPrice = similarItem['available_price']
                bestSellerPoints = similarItem['bestSellerPoints']
            else:
                pass
        else:
            if mc.get("negative_deals") is None:
                populateNegativeDeals()
            if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price'] or similarItem['_id'] in mc.get("negative_deals"):
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0, 'prepaidDeal':0 }})
                continue
            if similarItem['source_id'] == SOURCE_MAP.get('SHOPCLUES.COM') and similarItem['rank']==0:
                get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':0 }})
                continue
            if similarItem['available_price'] < prepaidBestPrice:
                prepaidBestOne = similarItem
                prepaidBestPrice = similarItem['available_price']
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
            elif similarItem['available_price'] == prepaidBestPrice and prepaidBestSellerPoints < similarItem['bestSellerPoints']:
                prepaidBestOne = similarItem
                prepaidBestPrice = similarItem['available_price']
                prepaidBestSellerPoints = similarItem['bestSellerPoints']
            else:
                pass
    if bestOne is not None or prepaidBestOne is not None:
        for similarItem in similarItems:
            toUpdate.append(similarItem['_id'])
        if bestOne is not None:
            toUpdate.remove(bestOne['_id'])
            get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1,'prepaidDeal':0 }})
        if prepaidBestOne is not None:
            if bestOne is not None:
                if prepaidBestOne['available_price'] < bestOne['available_price']: 
                    toUpdate.remove(prepaidBestOne['_id'])
                    get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
            else:
                toUpdate.remove(prepaidBestOne['_id'])
                get_mongo_connection().Catalog.Deals.update({ '_id' : prepaidBestOne['_id'] }, {'$set':{'showDeal':0,'prepaidDeal':1 }})
    if len(toUpdate) > 0:
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0,'prepaidDeal':0 }},upsert=False, multi=True)

def main():
    populate()
            
if __name__=='__main__':
    main()