Subversion Repositories SmartDukaan

Rev

Rev 19592 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

from elixir import *
from dtr.storage.DataService import FlipkartOrders
from dtr.storage import DataService
from dtr.utils.utils import get_mongo_connection, get_mongo_connection_dtr_data, \
to_java_date, to_py_date
from cassandra.cluster import Cluster
import optparse
from datetime import datetime, timedelta, time

parser = optparse.OptionParser()
parser.add_option("-H", "--host", dest="hostname",
                      default="localhost",
                      type="string", help="The HOST where the DB server is running",
                      metavar="host")
parser.add_option("-m", "--m", dest="mongoHostDtr",
                      default="localhost",
                      type="string", help="Host where Catalog/Dtr dbs exist",
                      metavar="mongo_host")
parser.add_option("-d", "--d", dest="mongoHostDtrData",
                      default="localhost",
                      type="string", help="Host to dump data",
                      metavar="mongo_host_dtr_data")

parser.add_option("-c", "--c", dest="cassandraHost",
                      default="localhost",
                      type="string", help="Cassandra host",
                      metavar="cassandra_host")

(options, args) = parser.parse_args()

dtr_host = options.mongoHostDtr
dtr_data_host = options.mongoHostDtrData
db_hostname = options.hostname
cassandra_host = options.cassandraHost

cluster = Cluster([cassandra_host])
session_cassandra = cluster.connect()
session_cassandra.set_keyspace("profitmandi")

print dtr_host
print db_hostname


bundleMap = {}

class Analytics:
    def __init__(self, skuBundleId, sales, views, date):
        self.skuBundleId = skuBundleId
        self.sales = sales
        self.views = views
        self.date = date

def createViewsMap():
    global bundleMap
    #Datefield - Midnight.
    #start - Previous day midnight
    #Data stored in cassandra is in utc format
    date_field = (datetime.combine((datetime.now()).date(), time.min))
    start = to_java_date(date_field -timedelta(days=1))
    end = to_java_date(date_field)
    utc_start =  to_java_date(datetime.utcfromtimestamp((start)/1000))
    utc_end =  to_java_date(datetime.utcfromtimestamp((end)/1000))
    query = "select * from events where label='click' and time >='"+str(utc_start)+"' and time < '"+str(utc_end)+"' ALLOW FILTERING"
    print query
    result = session_cassandra.execute(query)
    for i in result:
        sku = int((i.properties)['sku'])
        master = get_mongo_connection(host=dtr_host).Catalog.MasterData.find_one({'_id':sku})
        if master is None:
            continue
        bundleId = master['skuBundleId']
        if bundleMap.has_key(bundleId):
            obj = bundleMap.get(bundleId)
            obj.views = obj.views + 1 
        else:
            obj = Analytics(bundleId, 0, 1, start)
            bundleMap[bundleId] = obj

def createSalesMap():
    DataService.initialize(db_hostname=db_hostname)
    date_field = (datetime.combine((datetime.now()).date(), time.min))
    start = to_java_date(date_field -timedelta(days=1))
    end = to_java_date(date_field)
    all_orders = get_mongo_connection(host=dtr_host).Dtr.merchantOrder.find({'createdOnInt':{"$gte":start/1000,"$lt":end/1000}})
    for a in all_orders:
        store = a['storeId']
        if a.get('subOrders') is None:
            continue
        subOrders = a['subOrders']
        for s in subOrders:
            if s.get('productCode') is None:
                continue
            productCode = str(s['productCode'])
            qty = s['quantity']
            if store!=3:
                master = get_mongo_connection(host=dtr_host).Catalog.MasterData.find_one({'identifier':productCode,'source_id':store})
            else:
                master = get_mongo_connection(host=dtr_host).Catalog.MasterData.find_one({'secondaryIdentifier':productCode,'source_id':store})
            if master is None:
                continue
            skuBundleId = master.get('skuBundleId')
            if bundleMap.has_key(skuBundleId):
                obj = bundleMap.get(skuBundleId)
                obj.sales = obj.sales + qty 
            else:
                obj = Analytics(skuBundleId, qty, 0, start)
                bundleMap[skuBundleId] = obj
    fk_orders = session.query(FlipkartOrders).filter(FlipkartOrders.created> to_py_date(start)).filter(FlipkartOrders.created< to_py_date(end)).all()
    for fk_order in fk_orders:
        if fk_order.catalogId is None:
            continue
        skuBundleId = fk_order.catalogId
        qty = fk_order.quantity
        if bundleMap.has_key(skuBundleId):
            obj = bundleMap.get(skuBundleId)
            obj.sales = obj.sales + qty 
        else:
            obj = Analytics(skuBundleId, qty, 0, start)
            bundleMap[skuBundleId] = obj
    session.close()

def commit():
    for v in bundleMap.values():
        get_mongo_connection_dtr_data().Analytics.ViewSalesData.insert(v.__dict__)

def assignBestSellerRanks():
    skuBundleRankMap = {}
    date_field = (datetime.combine((datetime.now()).date(), time.min))
    start = to_java_date(date_field -timedelta(days=30))
    salesData = get_mongo_connection_dtr_data().Analytics.ViewSalesData.find({'date':{"$gte":start}},{'skuBundleId':1,'sales':1})
    for data in salesData:
        if skuBundleRankMap.has_key(data['skuBundleId']):
            skuBundleRankMap[data['skuBundleId']] = skuBundleRankMap.get(data['skuBundleId']) + data['sales']
        else:
            skuBundleRankMap[data['skuBundleId']] = data['sales']
    sorted_skuBundleRankMap = sorted(skuBundleRankMap.items(), key=lambda x: x[1],reverse=True)
    get_mongo_connection(host=dtr_host).Catalog.MasterData.update({'rank':{"$gt":0},'category_id':6},{"$set":{'rank':0}},upsert=False,multi=True)
    rank = 1
    for i in sorted_skuBundleRankMap:
        if rank > 100:
            return
        master = get_mongo_connection(host=dtr_host).Catalog.MasterData.find_one({'skuBundleId':i[0]})
        if master is None or master['category_id']!=6:
            continue
        get_mongo_connection(host=dtr_host).Catalog.MasterData.update({'skuBundleId':master['skuBundleId']},{"$set":{"rank":rank,'updatedOn':to_java_date(datetime.now())}},upsert=False,multi=True)
        rank = rank + 1
        

def main():
    createViewsMap()
    createSalesMap()
    commit()
    assignBestSellerRanks()
    

if __name__ == '__main__':
    main()