Subversion Repositories SmartDukaan

Rev

Rev 16625 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
16356 manas 1
import MySQLdb
2
from dtr.storage.DataService import brand_preferences
16625 manas 3
from elixir import *
4
from dtr.storage import DataService, Mongo
5
from dtr.storage.DataService import user_filters
6
from sqlalchemy.sql.expression import func, between
7
from datetime import datetime,timedelta
8
DataService.initialize(echo=True)
16356 manas 9
 
10
DB_HOST = "localhost"
11
DB_USER = "root"
12
DB_PASSWORD = "shop2020"
13
DB_NAME = "dtr"
14
 
15
USER_SEGMENTATION_LINK_QUERY="""
16359 manas 16
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
16356 manas 17
  from
18
    (
16359 manas 19
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
16356 manas 20
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
21
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
16625 manas 22
    c.brand not like '' and c.user_id!=0 and datediff(curdate(),c.created)<7 group by c.category_id,c.brand,c.user_id
16356 manas 23
 
24
    UNION
16359 manas 25
 
26
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
27
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
16625 manas 28
    where m.categoryId is not null and m.brand is not null and m.userId!=0 and datediff(curdate(),m.createdTime)<7 group by m.categoryId,m.brand,m.userId
16356 manas 29
 
30
    UNION
16359 manas 31
 
32
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
16356 manas 33
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
16625 manas 34
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
35
    and datediff(curdate(),fo.created)<7 group by fo.category,fo.brand,fo.user_id
16359 manas 36
 
16356 manas 37
    UNION
16359 manas 38
 
16360 manas 39
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
16359 manas 40
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
16625 manas 41
    where user_id!=0 and status='show' and datediff(curdate(),bp.created)<7 group by bp.category_id,bp.brand,bp.user_id
16359 manas 42
) as x
43
 
16356 manas 44
"""
45
 
16625 manas 46
USER_STORE_PRODUCT_VIEW="""
16627 manas 47
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
16625 manas 48
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
49
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
50
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
16627 manas 51
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
16625 manas 52
    and datediff(curdate(),u.created)<7
53
    group by u.user_id,skuBundleId; 
54
"""
16356 manas 55
 
16625 manas 56
REPLACE_QUERY="""
57
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight ) 
58
SELECT user_id, brand, source,category_id, count, weight
59
FROM userlinkssegmentation7days
60
WHERE user_id =%s and brand=%s and category_id=%s
61
"""
62
 
63
 
64
userMap = {} #key - user_id val - brand map
65
userMobileCategory={}
66
userTabletCategory={}
67
 
16356 manas 68
def getDbConnection():
69
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
70
 
16625 manas 71
 
72
def deleteUserForLinks():
73
    print '********** Deleting records from last 7 days tables *********'
16356 manas 74
    conn = getDbConnection()
75
    cursor = conn.cursor()
16625 manas 76
    deleteAllUserGroups = "delete from userlinkssegmentation7days"
77
    try:
78
        cursor.execute(deleteAllUserGroups)
79
        conn.commit()
80
    except:
81
        conn.rollback()
82
    finally:
83
        conn.close()    
84
        print '********** Completed Deleting records from last 7 days tables *********'
16356 manas 85
 
86
def addUsersForLinks():
16625 manas 87
    print '********** Inserting records in last 7 days table *********'
16356 manas 88
    user_id=''
89
    brand=''
90
    category_id='' 
91
    source=''
92
    count=''
93
    weight=''
94
 
95
    datesql= USER_SEGMENTATION_LINK_QUERY
96
    conn = getDbConnection()
97
    cursor = conn.cursor()
98
    cursor.execute(datesql)
99
    result = cursor.fetchall()
100
 
101
    for r in result:
102
        user_id=r[0]
16359 manas 103
        brand=r[1]
104
        if r[2] == 'Mobiles':
16356 manas 105
            category_id=3
16359 manas 106
        elif r[2] == 'Tablets':
16356 manas 107
            category_id=5
108
        else:
16359 manas 109
            category_id=r[2] 
16362 manas 110
        count=r[3]
111
        source=r[4]
16359 manas 112
        weight=r[5]
16625 manas 113
        sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
16359 manas 114
        cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
16356 manas 115
        conn.commit()
16625 manas 116
    conn.close()
117
    print '********** Completed inserting records for orders,preferences and clicks in last 7 days table *********'
118
 
119
def filters():
120
    print '********** Inserting records for filters in last 7 day table *********'
121
    conn = getDbConnection()
122
    cursor = conn.cursor()
123
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
124
    result = cursor.fetchall()
125
    filter_weight=result[0][0]
126
    todayDate=datetime.today().date()
127
    data = session.query((user_filters)).filter_by(type="brand").filter(user_filters.created.between(todayDate-timedelta(days=6),todayDate)).all()
128
    for d in data:
129
        if userMap.has_key(d.user_id):
130
            brandMap = userMap.get(d.user_id) 
131
        else:
132
            brandMap = {}
133
        brands = d.filters.split('|')
134
        for brand in brands:
135
            if brandMap.has_key(brand):
136
                brandMap[brand] = brandMap.get(brand) + 1
137
            else:
138
                brandMap[brand] = 1
139
        userMap[d.user_id] = brandMap
140
    for x, y in userMap.iteritems():
141
        for bra,cow in y.iteritems():
142
            user_id=x
143
            brand=bra
144
            count=cow
145
            category_id=3
146
            weight=cow*filter_weight
147
            source='filters'
148
            sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
149
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
150
            conn.commit()
151
 
152
    conn.close()
153
    print '********** Completed inserting records for filters in last 7 day table *********'
154
 
155
def storeProductView():
156
    print '********** Inserting records for store product view in last 7 day table *********'
157
    user_id=''
158
    brand=''
159
    category_id='' 
160
    source=''
161
    count=''
162
    weight=''
16356 manas 163
 
16625 manas 164
    datesql= USER_STORE_PRODUCT_VIEW
165
    conn = getDbConnection()
166
    cursor = conn.cursor()
167
    cursor.execute(datesql)
168
    result = cursor.fetchall()
169
 
170
    for r in result:
171
        skuData = Mongo.getSkuBrandData(int(r[2]))
172
        if len(skuData) !=0:
173
            user_id=r[0]
174
            brand=skuData['brand']
175
            category_id =skuData['category_id']
176
            source=r[3]
177
            count=r[1]
178
            if category_id==3:
179
                if userMobileCategory.has_key(user_id):
180
                    brandMap = userMobileCategory.get(user_id)
181
                else:
182
                    brandMap = {}
183
                if brandMap.has_key(brand):
184
                    brandMap[brand] = brandMap.get(brand) + r[1]
185
                else:
186
                    brandMap[brand] = r[1]
187
                userMobileCategory[user_id] = brandMap
188
            if category_id==5:
189
                if userTabletCategory.has_key(user_id):
190
                    brandCategoryMap = userTabletCategory.get(user_id)
191
                else:
192
                    brandCategoryMap = {}
193
                if brandCategoryMap.has_key(brand):
194
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + r[1]
195
                else:
196
                    brandCategoryMap[brand] = r[1]
197
                userTabletCategory[user_id] = brandCategoryMap    
198
        else:
199
            print 'Sku bundle Id not present',r[2]
200
 
201
 
202
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
203
    result = cursor.fetchall()
204
    product_view_weight=result[0][0]
205
 
206
    for x, y in userMobileCategory.iteritems():
207
        for bra,cow in y.iteritems():
208
            user_id=x
209
            brand= bra
210
            count=cow
211
            weight=cow*product_view_weight
212
            source='product_view'
213
            category_id=3
214
            sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
215
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
216
            conn.commit()
217
 
218
    for x, y in userTabletCategory.iteritems():
219
        for bra,cow in y.iteritems():
220
            user_id=x
221
            brand= bra
222
            count=cow
223
            weight=cow*product_view_weight
224
            source='product_view'
225
            category_id=5
226
            sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
227
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
228
            conn.commit()
16356 manas 229
    conn.close()
230
 
16625 manas 231
    print '********** Completed inserting records for store product view in last 7 day table *********'
232
 
233
def findRepeatingOrders():
234
    print '********** Call to sanitize data for orders if any multiple *********'
235
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
16356 manas 236
    conn = getDbConnection()
237
    cursor = conn.cursor()
16625 manas 238
    rows_count = cursor.execute(datesql)
239
    if rows_count>0:
240
        result = cursor.fetchall()
241
        for r in result:
242
            print r
243
            findDetails(r[0],r[1],r[2])
244
    else:
245
        print 'No Data to be sanitized for repeating orders'            
246
 
247
def findDetails(userId,brand,category):
248
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
249
    conn = getDbConnection()
250
    cursor = conn.cursor()
251
    cursor.execute(datesql,(userId,brand))
252
    result = cursor.fetchall()
253
    for r in result:
254
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
255
        cursor = conn.cursor()
256
        cursor.execute(deletesql,(userId,brand))
16356 manas 257
        conn.commit()
16625 manas 258
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
259
        cursor = conn.cursor()
260
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
261
        conn.commit()
262
 
263
def findRepeatingClicks():
264
    print '********** Call to sanitize data for product view if any multiple *********'
265
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
266
    conn = getDbConnection()
267
    cursor = conn.cursor()
268
    rows_count = cursor.execute(datesql)
269
    if rows_count>0:
270
        result = cursor.fetchall()
271
        for r in result:
272
            print r
273
            findViewDetails(r[0],r[1],r[2])
274
    else:
275
        print 'No Data to be sanitized for product view'            
276
 
277
def findViewDetails(userId,brand,category):
278
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
279
    conn = getDbConnection()
280
    cursor = conn.cursor()
281
    cursor.execute(datesql,(userId,brand))
282
    result = cursor.fetchall()
283
    for r in result:
284
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
285
        cursor = conn.cursor()
286
        cursor.execute(deletesql,(userId,brand))
287
        conn.commit()
288
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
289
        cursor = conn.cursor()
290
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
291
        conn.commit()        
16356 manas 292
 
16625 manas 293
 
294
def checkCountForUpdate():
295
    print '********** Checking users brand whose cumulative weight is greater than 3 *********'
296
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days group by user_id,brand having sum(weight) >=3;"
297
    conn = getDbConnection()
298
    cursor = conn.cursor()
299
    cursor.execute(datesql)
300
    result = cursor.fetchall()
301
    for r in result:
302
        checkUserForUpdate(r[0],r[1],r[2])
303
    conn.close()    
304
def checkUserForUpdate(userId,brand,category):
305
    print '********** Checking users brand present or not in the master table *********'
306
    datesql= "select distinct brand from userlinkssegmentation where user_id=%s and brand=%s and category_id=%s" 
307
    conn = getDbConnection()
308
    cursor = conn.cursor()
309
    cursor.execute(datesql,(userId,brand,category))
310
    result = cursor.fetchall()
311
    if len(result)==0:
312
        insertNewUserLinkDetails(userId,brand,category)
313
    else:
314
        userReplaceData(userId,brand,category)
315
    conn.close()    
316
 
317
def userReplaceData(userId,brand,category):
318
    print 'Data that needs to be updated for', userId,brand,category
319
    conn = getDbConnection()
320
    cursor = conn.cursor()
321
    cursor.execute(REPLACE_QUERY,(userId,brand,category))
322
    conn.commit()
323
    conn.close()
324
 
325
def insertNewUserLinkDetails(userId,brand,category):
326
    print 'Data needs to be inserted as the brand was not present earlier', userId,brand,category
327
    conn = getDbConnection()
328
    selectNewUser="select * from userlinkssegmentation7days where user_id=%s and brand=%s and category_id=%s"
329
    cursor = conn.cursor()
330
    cursor.execute(selectNewUser,(userId,brand,category))
331
    result = cursor.fetchall()
332
    print 'Result while inserting new record in master',result
333
    for r in result:
334
        sql = "insert into userlinkssegmentation (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
335
        cursor.execute(sql,(r[0],r[1],r[2], r[3], r[4],r[5]))
336
        conn.commit()
337
    conn.close()    
16356 manas 338
def main():
16625 manas 339
    print datetime.now()
340
    print '********** Starting to update links for Users *********'
16356 manas 341
    deleteUserForLinks()
342
    addUsersForLinks()
16625 manas 343
    filters()
344
    storeProductView()
345
    findRepeatingOrders()
346
    findRepeatingClicks()
347
    checkCountForUpdate()
348
    print '********** Finished updating records for user links *********'
16356 manas 349
if __name__=='__main__':
16625 manas 350
    main()