Rev 13828 | Rev 15088 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
import urllib2import simplejson as jsonimport pymongofrom dtr.utils.utils import to_java_datefrom datetime import datetimeimport optparsecon = Noneparser = optparse.OptionParser()parser.add_option("-m", "--m", dest="mongoHost",default="localhost",type="string", help="The HOST where the mongo server is running",metavar="mongo_host")(options, args) = parser.parse_args()headers = {'User-agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11','Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Accept-Language' : 'en-US,en;q=0.8','Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3'}bestSellers = []now = datetime.now()class __RankInfo:def __init__(self, identifier, rank):self.identifier = identifierself.rank = rankdef get_mongo_connection(host=options.mongoHost, port=27017):global conif con is None:print "Establishing connection %s host and port %d" %(host,port)try:con = pymongo.MongoClient(host, port)except Exception, e:print ereturn Nonereturn condef scrapeBestSellerMobiles():global bestSellersrank = 1for z in [0,20,40,60,80]:url = "http://www.snapdeal.com/acors/json/product/get/search/175/%d/20?q=&sort=bstslr&keyword=&clickSrc=&viewType=List&lang=en&snr=false" %(z)print urlrequest = urllib2.Request(url,headers=headers)response = urllib2.urlopen(request)json_input = response.read()info = json.loads(json_input)for offer in info['productOfferGroupDtos']:for identifiers in offer['offers']:r_info = __RankInfo((identifiers['supcs'])[0],rank)bestSellers.append(r_info)rank += 1def scrapeBestSellerTablets():global bestSellersbestSellers = []rank = 1for z in [0,20,40,60,80]:url = "http://www.snapdeal.com/acors/json/product/get/search/133/%d/20?sort=bstslr&keyword=&clickSrc=&viewType=List&lang=en&snr=false" %(z)print urlrequest = urllib2.Request(url,headers=headers)response = urllib2.urlopen(request)json_input = response.read()info = json.loads(json_input)for offer in info['productOfferGroupDtos']:for identifiers in offer['offers']:r_info = __RankInfo((identifiers['supcs'])[0],rank)bestSellers.append(r_info)rank += 1def resetRanks(category):oldRankedItems = get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0},'source_id':3,'category':category})for item in oldRankedItems:get_mongo_connection().Catalog.MasterData.update({'_id':item['_id']}, {'$set' : {'rank':0,'updatedOn':to_java_date(now)}}, multi=True)def commitBestSellers():print "Rank",print '\t',print 'Identifier'for x in bestSellers:print x.rank,print '\t',print x.identifier,col = get_mongo_connection().Catalog.MasterData.find({'identifier':x.identifier.strip()})print "count sku",print '\t',print len(list(col))get_mongo_connection().Catalog.MasterData.update({'identifier':x.identifier.strip()}, {'$set' : {'rank':x.rank,'updatedOn':to_java_date(now)}}, multi=True)def main():scrapeBestSellerMobiles()if len(bestSellers) > 0:resetRanks('Mobiles')commitBestSellers()scrapeBestSellerTablets()if len(bestSellers) > 0:resetRanks('Tablets')commitBestSellers()if __name__=='__main__':main()