Subversion Repositories SmartDukaan

Rev

Rev 13754 | Rev 14379 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import urllib2
import simplejson as json
import pymongo
from dtr.utils.utils import to_java_date
from datetime import datetime

headers = { 
           'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
            'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',      
            'Accept-Language' : 'en-US,en;q=0.8',                     
            'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'
        }

con = None
bestSellers = []
now = datetime.now()

class __RankInfo:
    
    def __init__(self, identifier, rank):
        self.identifier = identifier
        self.rank  = rank

def get_mongo_connection(host='localhost', port=27017):
    global con
    if con is None:
        print "Establishing connection %s host and port %d" %(host,port)
        try:
            con = pymongo.MongoClient(host, port)
        except Exception, e:
            print e
            return None
    return con

def scrapeBestSellerMobiles():
    global bestSellers
    rank = 1
    for z in [0,20,40,60,80]:
        url = "http://www.snapdeal.com/acors/json/product/get/search/175/%d/20?q=&sort=bstslr&keyword=&clickSrc=&viewType=List&lang=en&snr=false" %(z)
        print url
        request = urllib2.Request(url,headers=headers)
        response = urllib2.urlopen(request)

        json_input = response.read()
        info = json.loads(json_input)
        for offer in info['productOfferGroupDtos']:
            for identifiers in offer['offers']:
                r_info = __RankInfo((identifiers['supcs'])[0],rank)
                bestSellers.append(r_info)
            rank += 1

def scrapeBestSellerTablets():
    global bestSellers
    bestSellers = []
    rank = 1
    for z in [0,20,40,60,80]:
        url = "http://www.snapdeal.com/acors/json/product/get/search/133/%d/20?sort=bstslr&keyword=&clickSrc=&viewType=List&lang=en&snr=false" %(z)
        print url
        request = urllib2.Request(url,headers=headers)
        response = urllib2.urlopen(request)

        json_input = response.read()
        info = json.loads(json_input)
        for offer in info['productOfferGroupDtos']:
            for identifiers in offer['offers']:
                r_info = __RankInfo((identifiers['supcs'])[0],rank)
                bestSellers.append(r_info)
            rank += 1

def resetRanks(category):
    oldRankedItems = get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0},'source_id':3,'category':category})
    for item in oldRankedItems:
        get_mongo_connection().Catalog.MasterData.update({'_id':item['_id']}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)

def commitBestSellers():
    print "Rank",
    print '\t',
    print 'Identifier'
    for x in bestSellers:
        print x.rank,
        print '\t',
        print x.identifier,
        col = get_mongo_connection().Catalog.MasterData.find({'identifier':x.identifier.strip()})
        print "count sku",
        print '\t',
        print len(list(col))
        get_mongo_connection().Catalog.MasterData.update({'identifier':x.identifier.strip()}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)

def main():
    scrapeBestSellerMobiles()
    if len(bestSellers) > 0:
        resetRanks('Mobiles')
        commitBestSellers()
    scrapeBestSellerTablets()
    if len(bestSellers) > 0:
        resetRanks('Tablets')
        commitBestSellers()

if __name__=='__main__':
    main()