Subversion Repositories SmartDukaan

Rev

Rev 17329 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

import MySQLdb
from dtr.storage.DataService import brand_preferences
from elixir import *
from dtr.storage import DataService, Mongo
from dtr.storage.DataService import user_filters
from sqlalchemy.sql.expression import func, between
from datetime import datetime,timedelta
from UserSegmentationLinksMaster import get_mongo_connection_new
import re
from dtr.utils.utils import to_java_date
DataService.initialize(echo=True)

DB_HOST = "localhost"
DB_USER = "root"
DB_PASSWORD = "shop2020"
DB_NAME = "dtr"

USER_SEGMENTATION_LINK_QUERY="""
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
  from
    (
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
    c.brand not like '' and c.user_id!=0 and datediff(curdate(),c.created)<7 group by c.category_id,c.brand,c.user_id
    
    UNION
   
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
    where m.categoryId is not null and m.brand is not null and m.brand not like 'NA' and m.userId!=0 and datediff(curdate(),m.createdTime)<7 group by m.categoryId,m.brand,m.userId
  
    UNION
  
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
    and datediff(curdate(),fo.created)<7 group by fo.category,fo.brand,fo.user_id
    
    UNION
    
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
    where user_id!=0 and status='show' and datediff(curdate(),bp.created)<7 group by bp.category_id,bp.brand,bp.user_id
) as x

"""

USER_STORE_PRODUCT_VIEW="""
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
    and datediff(curdate(),u.created)<7
    group by u.user_id,skuBundleId; 
"""

REPLACE_QUERY="""
REPLACE INTO newuserlinkssegmentation( user_id, brand, source,category_id, count, weight,modified) 
SELECT user_id, brand, source,category_id, count, weight,created
FROM newuserlinkssegmentation7days
WHERE user_id =%s and brand=%s and category_id=%s and source=%s
"""


userMap = {} #key - user_id val - brand map
userMobileCategory={}
userTabletCategory={}

def getDbConnection():
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)


def deleteUserForLinks():
    print '********** Deleting records from last 7 days tables *********'
    conn = getDbConnection()
    cursor = conn.cursor()
    deleteAllUserGroups = "delete from newuserlinkssegmentation7days"
    try:
        cursor.execute(deleteAllUserGroups)
        conn.commit()
    except:
        conn.rollback()
    finally:
        conn.close()    
        print '********** Completed Deleting records from last 7 days tables *********'

def addUsersForLinks():
    print '********** Inserting records in last 7 days table *********'
    user_id=''
    brand=''
    category_id='' 
    source=''
    count=''
    weight=''

    datesql= USER_SEGMENTATION_LINK_QUERY
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql)
    result = cursor.fetchall()
    
    for r in result:
        user_id=r[0]
        brand=r[1]
        if r[2] == 'Mobiles':
            category_id=3
        elif r[2] == 'Tablets':
            category_id=5
        else:
            category_id=r[2] 
        count=r[3]
        source=r[4]
        weight=r[5]
        if brand is not 'NA':
            sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
            conn.commit()
    conn.close()
    print '********** Completed inserting records for orders,preferences and clicks in last 7 days table *********'
    
def filters():
    print '********** Inserting records for filters in last 7 day table *********'
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
    result = cursor.fetchall()
    filter_weight=result[0][0]
    todayDate=datetime.today().date()
    data = session.query((user_filters)).filter_by(type="brand").filter(~(user_filters.filters).like('NA')).filter(user_filters.created.between(todayDate-timedelta(days=6),todayDate)).all()
    for d in data:
        if userMap.has_key(d.user_id):
            brandMap = userMap.get(d.user_id) 
        else:
            brandMap = {}
        brands = d.filters.split('|')
        for brand in brands:
            if brandMap.has_key(brand):
                brandMap[brand] = brandMap.get(brand) + 1
            else:
                brandMap[brand] = 1
        userMap[d.user_id] = brandMap
    for x, y in userMap.iteritems():
        for bra,cow in y.iteritems():
            user_id=x
            brand=bra
            count=cow
            category_id=3
            weight=cow*filter_weight
            source='filters'
            if brand is not 'NA':
                sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
                conn.commit()
    conn.close()
    print '********** Completed inserting records for filters in last 7 day table *********'
    
def storeProductView():
    print '********** Inserting records for store product view in last 7 day table *********'
    user_id=''
    brand=''
    category_id='' 
    source=''
    count=''
    weight=''
    
    result = get_mongo_connection_new().User.browsinghistories.find({"$and":[{"url":{"$regex" : "/store_products\/view/"}},{"created":{"$gte":getLastSeventhDay()}}]})
    
    for r in result:
        matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
        if matchObj:
            skuBundleId = matchObj.group(2)
            if 'user_id' in skuBundleId or '=' in skuBundleId:
                newMatchObj = re.search('(.+?)\?',skuBundleId)
                if newMatchObj:
                    skuBundleId=newMatchObj.group(1)
                else:
                    print 'No match userId', skuBundleId
                    continue
            elif '#' in skuBundleId:
                newMatchObj = re.search('(.+?)\#',skuBundleId)
                if newMatchObj:
                    skuBundleId=newMatchObj.group(1)
                else:
                    print 'No match #', skuBundleId
                    continue
            if '%' in skuBundleId:
                newMatchObj = re.search('(.+?)\%',skuBundleId)
                if newMatchObj:
                    skuBundleId=newMatchObj.group(1)
                else:
                    print 'No match', skuBundleId                     
        else:
            print "No match!!"
        print skuBundleId            
        if skuBundleId.isdigit():
            print 'Inside is digit true'
            skuBundleId = int(skuBundleId)
        else:
            print 'Inside is digit false'
            skuBundleId =  int(float(skuBundleId))
            
        skuData = Mongo.getSkuBrandData(skuBundleId)
        if len(skuData) !=0:
            user_id=r.get('user_id')
            brand=skuData['brand']
            category_id =skuData['category_id']
            source='product_view'
            count=1
            if category_id==3:
                if userMobileCategory.has_key(user_id):
                    brandMap = userMobileCategory.get(user_id)
                else:
                    brandMap = {}
                if brandMap.has_key(brand):
                    brandMap[brand] = brandMap.get(brand) + 1 
                else:
                    brandMap[brand] = 1
                userMobileCategory[user_id] = brandMap
            if category_id==5:
                if userTabletCategory.has_key(user_id):
                    brandCategoryMap = userTabletCategory.get(user_id)
                else:
                    brandCategoryMap = {}
                if brandCategoryMap.has_key(brand):
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
                else:
                    brandCategoryMap[brand] = 1
                userTabletCategory[user_id] = brandCategoryMap    
        else:
            print 'Sku bundle Id not present',r.get('url')

    conn=getDbConnection()
    cursor=conn.cursor()
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
    result = cursor.fetchall()
    product_view_weight=result[0][0]
    
    for x, y in userMobileCategory.iteritems():
        for bra,cow in y.iteritems():
            user_id=x
            brand= bra
            count=cow
            weight=cow*product_view_weight
            source='product_view'
            category_id=3
            sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
            conn.commit()
            
    for x, y in userTabletCategory.iteritems():
        for bra,cow in y.iteritems():
            user_id=x
            brand= bra
            count=cow
            weight=cow*product_view_weight
            source='product_view'
            category_id=5
            sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
            conn.commit()
    conn.close()
    
    print '********** Completed inserting records for store product view in last 7 day table *********'
    
def findRepeatingOrders():
    print '********** Call to sanitize data for orders if any multiple *********'
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
    conn = getDbConnection()
    cursor = conn.cursor()
    rows_count = cursor.execute(datesql)
    if rows_count>0:
        result = cursor.fetchall()
        for r in result:
            print r
            findDetails(r[0],r[1],r[2])
    else:
        print 'No Data to be sanitized for repeating orders'            
        
def findDetails(userId,brand,category):
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql,(userId,brand))
    result = cursor.fetchall()
    for r in result:
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
        cursor = conn.cursor()
        cursor.execute(deletesql,(userId,brand))
        conn.commit()
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
        cursor = conn.cursor()
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
        conn.commit()

def findRepeatingClicks():
    print '********** Call to sanitize data for product view if any multiple *********'
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
    conn = getDbConnection()
    cursor = conn.cursor()
    rows_count = cursor.execute(datesql)
    if rows_count>0:
        result = cursor.fetchall()
        for r in result:
            print r
            findViewDetails(r[0],r[1],r[2])
    else:
        print 'No Data to be sanitized for product view'            
        
def findViewDetails(userId,brand,category):
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql,(userId,brand))
    result = cursor.fetchall()
    for r in result:
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
        cursor = conn.cursor()
        cursor.execute(deletesql,(userId,brand))
        conn.commit()
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
        cursor = conn.cursor()
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
        conn.commit()        
    
    
def checkCountForUpdate():
    print '********** Checking users brand whose cumulative weight is greater than 3 *********'
    datesql= "select user_id,brand,category_id from newuserlinkssegmentation7days group by user_id,brand,category_id having sum(weight) >=3;"
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql)
    result = cursor.fetchall()
    for r in result:
        checkUserForUpdate(r[0],r[1],r[2])
    conn.close()    
def checkUserForUpdate(userId,brand,category):
    print '********** Checking users brand present or not in the master table *********'
    sources = ['product_view','clicks','filters','orders','preferences']
    datesql= "select distinct brand from newuserlinkssegmentation where user_id=%s and brand=%s and category_id=%s" 
    conn = getDbConnection()
    cursor = conn.cursor()
    cursor.execute(datesql,(userId,brand,category))
    result = cursor.fetchone()
    print 'Result in Check user for update',result
    if len(result)==0:
        insertNewUserLinkDetails(userId,brand,category)
    else:
        datesql= "select distinct source from newuserlinkssegmentation where user_id=%s and brand=%s and category_id=%s"
        cursor.execute(datesql,(userId,result[0][0],category))
        result = cursor.fetchall()
        for source in result:
            userReplaceData(userId,brand,category, source)
            sources.remove(source)
        for source in sources:
            insertNewUserLinkDetailsSourceWise(userId,brand,category, source)
    conn.close()    

def userReplaceData(userId,brand,category,source):
    print 'Data that needs to be updated for', userId,brand,category
    conn = getDbConnection()
    cursor = conn.cursor()
    print REPLACE_QUERY%(userId,brand,category,source)
    cursor.execute(REPLACE_QUERY,(userId,brand,category,source))
    conn.commit()
    conn.close()

def insertNewUserLinkDetails(userId,brand,category):
    print 'Data needs to be inserted as the brand was not present earlier', userId,brand,category
    conn = getDbConnection()
    selectNewUser="select * from newuserlinkssegmentation7days where user_id=%s and brand=%s and category_id=%s"
    cursor = conn.cursor()
    cursor.execute(selectNewUser,(userId,brand,category))
    result = cursor.fetchall()
    print 'Result while inserting new record in master',result
    for r in result:
        sql = "insert into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
        cursor.execute(sql,(r[0],r[1],r[2], r[3], r[4],r[5],datetime.now(),datetime.now()))
        conn.commit()
    conn.close()
    
def insertNewUserLinkDetailsSourceWise(userId,brand,category,source):
    print 'Data needs to be inserted as the source was not present earlier', userId,brand,category,source
    conn = getDbConnection()
    selectNewUser="select * from newuserlinkssegmentation7days where user_id=%s and brand=%s and category_id=%s and source=%s"
    cursor = conn.cursor()
    cursor.execute(selectNewUser,(userId,brand,category,source))
    result = cursor.fetchall()
    print 'Result while inserting new record in master',result
    for r in result:
        sql = "insert into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
        cursor.execute(sql,(r[0],r[1],r[2], r[3], r[4],r[5],datetime.now(),datetime.now()))
        conn.commit()
    conn.close()    
    
def getLastSeventhDay():
    return to_java_date(datetime.today() - timedelta(days=7))
            
def main():
    print datetime.now()
    print '********** Starting to update links for Users *********'
    deleteUserForLinks()
    addUsersForLinks()
    filters()
    storeProductView()
    #findRepeatingOrders()
    #findRepeatingClicks()
    checkCountForUpdate()
    print '********** Finished updating records for user links *********'
if __name__=='__main__':
    main()