Subversion Repositories SmartDukaan

Rev

Rev 17323 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import MySQLdb
from dtr.storage.DataService import brand_preferences
from elixir import *
from dtr.storage import DataService, Mongo
from dtr.storage.DataService import user_filters
from sqlalchemy.sql.expression import func, between
from datetime import datetime,timedelta
import re
import traceback
import pymongo
from operator import and_

DataService.initialize(echo=True)

DB_HOST = "localhost"
DB_USER = "root"
DB_PASSWORD = "shop2020"
DB_NAME = "dtr"
con=None

USER_SEGMENTATION_LINK_QUERY="""
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
  from
    (
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
    c.brand not like '' and c.user_id!=0 group by c.category_id,c.brand,c.user_id
    
    UNION
   
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
    where m.categoryId is not null and m.brand is not null and m.userId!=0 and m.brand not like 'NA' group by m.categoryId,m.brand,m.userId
  
    UNION
  
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
    group by fo.category,fo.brand,fo.user_id
    
    UNION
    
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
    where user_id!=0 and status='show' group by bp.category_id,bp.brand,bp.user_id
) as x
"""

USER_STORE_PRODUCT_VIEW="""
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
    and datediff(curdate(),u.created)<7
    group by u.user_id,skuBundleId; 
"""

REPLACE_QUERY="""
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight ) 
SELECT user_id, brand, source,category_id, count, weight
FROM userlinkssegmentation7days
WHERE user_id =%s and brand=%s and category_id=%s
"""


userMap = {}
userMobileCategory={}
userTabletCategory={}

def getDbConnection():
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)


def addUsersForLinks():
    print '********** Inserting records in master table *********'
    user_id=''
    brand=''
    category_id='' 
    source=''
    count=''
    weight=''

    datesql= USER_SEGMENTATION_LINK_QUERY
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql)
    result = cursor.fetchall()
    
    for r in result:
        user_id=r[0]
        brand=r[1]
        if r[2] == 'Mobiles':
            category_id=3
        elif r[2] == 'Tablets':
            category_id=5
        else:
            category_id=r[2] 
        count=r[3]
        source=r[4]
        weight=r[5]
        if brand is not 'NA':
            sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
            conn.commit()
    conn.close()
    print '********** Completed inserting records for orders,preferences and clicks in master table *********'
    
def filters():
    print '********** Inserting records for filters in master table *********'
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
    result = cursor.fetchall()
    filter_weight=result[0][0]
    data = session.query((user_filters)).filter_by(type="brand").filter(~(user_filters.filters).like('NA')).all()
    for d in data:
        if userMap.has_key(d.user_id):
            brandMap = userMap.get(d.user_id) 
        else:
            brandMap = {}
        brands = d.filters.split('|')
        for brand in brands:
            if brandMap.has_key(brand):
                brandMap[brand] = brandMap.get(brand) + 1
            else:
                brandMap[brand] = 1
        userMap[d.user_id] = brandMap
    for x, y in userMap.iteritems():
        for bra,cow in y.iteritems():
            user_id=x
            brand=bra
            count=cow
            category_id=3
            weight=cow*filter_weight
            source='filters'
            if brand is not 'NA':
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
                conn.commit()
    conn.close()
    print '********** Completed inserting records for filters in master table *********'
    
def storeProductView():
    print '********** Inserting records for store product view in master table *********'
    user_id=''
    brand=''
    category_id='' 
    source=''
    count=''
    weight=''
    
    result = get_mongo_connection_new().User.browsinghistories.find({"url":{"$regex" : "/store_products\/view/"}})
    
    for r in result:
        matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
        if matchObj:
            skuBundleId = matchObj.group(2)
            if 'user_id' in skuBundleId or '=' in skuBundleId:
                newMatchObj = re.search('(.+?)\?',skuBundleId)
                if newMatchObj:
                    skuBundleId=newMatchObj.group(1)
                else:
                    print 'No match userId', skuBundleId
                    continue
            elif '#' in skuBundleId:
                newMatchObj = re.search('(.+?)\#',skuBundleId)
                if newMatchObj:
                    skuBundleId=newMatchObj.group(1)
                else:
                    print 'No match #', skuBundleId
                    continue
            if '%' in skuBundleId:
                newMatchObj = re.search('(.+?)\%',skuBundleId)
                if newMatchObj:
                    skuBundleId=newMatchObj.group(1)
                else:
                    print 'No match', skuBundleId                     
        else:
            print "No match!!"
        print skuBundleId            
        if skuBundleId.isdigit():
            print 'Inside is digit true'
            skuBundleId = int(skuBundleId)
        else:
            print 'Inside is digit false'
            skuBundleId =  int(float(skuBundleId))
            
        skuData = Mongo.getSkuBrandData(skuBundleId)
        if len(skuData) !=0:
            user_id=r.get('user_id')
            brand=skuData['brand']
            category_id =skuData['category_id']
            source='product_view'
            count=1
            if category_id==3:
                if userMobileCategory.has_key(user_id):
                    brandMap = userMobileCategory.get(user_id)
                else:
                    brandMap = {}
                if brandMap.has_key(brand):
                    brandMap[brand] = brandMap.get(brand) + 1 
                else:
                    brandMap[brand] = 1
                userMobileCategory[user_id] = brandMap
            if category_id==5:
                if userTabletCategory.has_key(user_id):
                    brandCategoryMap = userTabletCategory.get(user_id)
                else:
                    brandCategoryMap = {}
                if brandCategoryMap.has_key(brand):
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
                else:
                    brandCategoryMap[brand] = 1
                userTabletCategory[user_id] = brandCategoryMap    
        else:
            print 'Sku bundle Id not present',r.get('url')
    
    conn=getDbConnection()
    cursor=conn.cursor()
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
    result = cursor.fetchall()
    product_view_weight=result[0][0]
    
    try: 
        for x, y in userMobileCategory.iteritems():
            for bra,cow in y.iteritems():
                user_id=x
                brand= bra
                count=cow
                weight=cow*product_view_weight
                source='product_view'
                category_id=3
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
                conn.commit()
                 
        for x, y in userTabletCategory.iteritems():
            for bra,cow in y.iteritems():
                user_id=x
                brand= bra
                count=cow
                weight=cow*product_view_weight
                source='product_view'
                category_id=5
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
                conn.commit()
    except:
        print traceback.print_exc()
    conn.close()
    
    print '********** Completed inserting records for store product view in master table *********'
    
def findRepeatingOrders():
    print '********** Call to sanitize data for orders if any multiple *********'
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
    conn = getDbConnection()
    cursor = conn.cursor()
    rows_count = cursor.execute(datesql)
    if rows_count>0:
        result = cursor.fetchall()
        for r in result:
            print r
            findDetails(r[0],r[1],r[2])
    else:
        print 'No Data to be sanitized for repeating orders'            
        
def findDetails(userId,brand,category):
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql,(userId,brand))
    result = cursor.fetchall()
    for r in result:
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
        cursor = conn.cursor()
        cursor.execute(deletesql,(userId,brand))
        conn.commit()
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
        cursor = conn.cursor()
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
        conn.commit()

def findRepeatingClicks():
    print '********** Call to sanitize data for product view if any multiple *********'
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
    conn = getDbConnection()
    cursor = conn.cursor()
    rows_count = cursor.execute(datesql)
    if rows_count>0:
        result = cursor.fetchall()
        for r in result:
            print r
            findViewDetails(r[0],r[1],r[2])
    else:
        print 'No Data to be sanitized for product view'            
        
def findViewDetails(userId,brand,category):
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql,(userId,brand))
    result = cursor.fetchall()
    for r in result:
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
        cursor = conn.cursor()
        cursor.execute(deletesql,(userId,brand))
        conn.commit()
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
        cursor = conn.cursor()
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
        conn.commit()        
        
def get_mongo_connection_new(host='192.168.161.154', port=27017):
    global con
    if con is None:
        print "Establishing connection %s host and port %d" %(host,port)
        try:
            con = pymongo.MongoClient(host, port)
        except Exception, e:
            print e
            return None
    return con
        
def main():
    print datetime.now()
    print '********** Starting to generate Master Data Users *********'
    addUsersForLinks()
    filters()
    storeProductView()
    print '********** Finished updating records for user links *********'
if __name__=='__main__':
    main()