Subversion Repositories SmartDukaan

Rev

Rev 14157 | Rev 14178 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import pymongo
from dtr.utils.utils import to_java_date
from datetime import datetime, timedelta
from operator import itemgetter
from dtr.utils import FlipkartScraper,NewFlipkartScraper
from multiprocessing.dummy import Pool as ThreadPool 

con = None

def get_mongo_connection(host='localhost', port=27017):
    global con
    if con is None:
        print "Establishing connection %s host and port %d" %(host,port)
        try:
            con = pymongo.MongoClient(host, port)
        except Exception, e:
            print e
            return None
    return con

def populate():
    toScrapMap = {}
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
    for bestSeller in bestSellers: 
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':2}))
        for data in amazonBestSellers:
            if not toScrapMap.has_key(data['_id']):
                toScrapMap[data['_id']] = data
    for k, y in toScrapMap.iteritems():
        print k,
        print '\t',
        print y
    pool = ThreadPool(50)
    pool.map(scrapeFlipkart,toScrapMap.values())
    pool.close()
    pool.join()
    print "joining threads"
    print datetime.now()

def scrapeFlipkart(data):
    if data['source_id']!=2:
        return
    inStock = 0
    retryCount = 0
    print str(data['identifier'])
    if data['identifier'] is None or len(data['identifier'].strip())==0:
        print "returning in valid identifier"
        return
    
    try:
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
            print "sku id is already updated",data['_id'] 
            return
    except:
        pass
    
    
    lowestSp = 0
    inStock = 0
    scraperFk = FlipkartScraper.FlipkartScraper()
    scraperProductPage = NewFlipkartScraper.FlipkartProductPageScraper()
    try:
        if data['marketPlaceUrl']!="" or data['marketPlaceUrl'] !="http://www.flipkart.com/ps/%s"%(data['identifier']):
            result = scraperProductPage.read(data['marketPlaceUrl'])
            if result.get('lowestSp')!=0:
                lowestSp = result.get('lowestSp')
                inStock = result.get('inStock')
    except:
        print "Unable to scrape product page ",data['identifier']
    
    
    if lowestSp == 0:
        url = "http://www.flipkart.com/ps/%s"%(data['identifier'].strip())
        while(retryCount < 3):
            try:
                vendorsData = scraperFk.read(url)
                fetched = True
                break
            except Exception as e:
                print "***Retry count ",retryCount 
                retryCount+=1
                if retryCount == 3:
                    fetched = False
                print e
        if not fetched:
            print "Unable to fetch data after multiple tries.Continue for ",data['identifier']
            return
        
        sortedVendorsData = []
        sortedVendorsData = sorted(vendorsData, key=itemgetter('sellingPrice'))
        print "data",sortedVendorsData
        lowestSp, iterator = (0,)*2
        for vData in sortedVendorsData:
            if iterator == 0:
                lowestSp = vData['sellingPrice']
            break
        if lowestSp > 0:
            inStock = 1
    print lowestSp
    print inStock
    if lowestSp > 0:
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestSp,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestSp , 'in_stock':inStock}}, multi=True)
    else:
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
        
    try:
        recomputeDeal(data['skuBundleId'])
    except:
        print "Unable to compute deal for ",data['skuBundleId']

def recomputeDeal(skuBundleId):
    """Lets recompute deal for this bundle"""
    print "Recomputing for bundleId",skuBundleId
    
    similarItems = list(get_mongo_connection().Catalog.Deals.find({'skuBundleId':skuBundleId}).sort([('available_price',pymongo.ASCENDING)]))
    bestPrice = float("inf")
    bestOne = None
    bestSellerPoints = 0
    toUpdate = []
    for similarItem in similarItems:
        if similarItem['in_stock'] == 0 or similarItem['maxprice'] is None or similarItem['maxprice'] < similarItem['available_price']:
            get_mongo_connection().Catalog.Deals.update({ '_id' : similarItem['_id'] }, {'$set':{'showDeal':0 }})
            continue
        if similarItem['available_price'] < bestPrice:
            bestOne = similarItem
            bestPrice = similarItem['available_price']
            bestSellerPoints = similarItem['bestSellerPoints']
        elif similarItem['available_price'] == bestPrice and bestSellerPoints < similarItem['bestSellerPoints']:
            bestOne = similarItem
            bestPrice = similarItem['available_price']
            bestSellerPoints = similarItem['bestSellerPoints']
        else:
            pass
    if bestOne is not None:
        for similarItem in similarItems:
            toUpdate.append(similarItem['_id'])
        toUpdate.remove(bestOne['_id'])
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
    if len(toUpdate) > 0:
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)

def main():
    populate()
            
if __name__=='__main__':
    main()