Subversion Repositories SmartDukaan

Rev

Rev 17329 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
17325 manas 1
import MySQLdb
2
from dtr.storage.DataService import brand_preferences
3
from elixir import *
4
from dtr.storage import DataService, Mongo
5
from dtr.storage.DataService import user_filters
6
from sqlalchemy.sql.expression import func, between
7
from datetime import datetime,timedelta
8
from UserSegmentationLinksMaster import get_mongo_connection_new
9
import re
10
from dtr.utils.utils import to_java_date
11
DataService.initialize(echo=True)
12
 
13
DB_HOST = "localhost"
14
DB_USER = "root"
15
DB_PASSWORD = "shop2020"
16
DB_NAME = "dtr"
17
 
18
USER_SEGMENTATION_LINK_QUERY="""
19
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
20
  from
21
    (
22
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
23
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
24
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
25
    c.brand not like '' and c.user_id!=0 and datediff(curdate(),c.created)<7 group by c.category_id,c.brand,c.user_id
26
 
27
    UNION
28
 
29
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
30
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
31
    where m.categoryId is not null and m.brand is not null and m.brand not like 'NA' and m.userId!=0 and datediff(curdate(),m.createdTime)<7 group by m.categoryId,m.brand,m.userId
32
 
33
    UNION
34
 
35
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
36
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
37
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
38
    and datediff(curdate(),fo.created)<7 group by fo.category,fo.brand,fo.user_id
39
 
40
    UNION
41
 
42
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
43
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
44
    where user_id!=0 and status='show' and datediff(curdate(),bp.created)<7 group by bp.category_id,bp.brand,bp.user_id
45
) as x
46
 
47
"""
48
 
49
USER_STORE_PRODUCT_VIEW="""
50
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
51
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
52
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
53
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
54
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
55
    and datediff(curdate(),u.created)<7
56
    group by u.user_id,skuBundleId; 
57
"""
58
 
59
REPLACE_QUERY="""
60
REPLACE INTO newuserlinkssegmentation( user_id, brand, source,category_id, count, weight,modified) 
61
SELECT user_id, brand, source,category_id, count, weight,created
62
FROM newuserlinkssegmentation7days
17330 manas 63
WHERE user_id =%s and brand=%s and category_id=%s and source=%s
17325 manas 64
"""
65
 
66
 
67
userMap = {} #key - user_id val - brand map
68
userMobileCategory={}
69
userTabletCategory={}
70
 
71
def getDbConnection():
72
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
73
 
74
 
75
def deleteUserForLinks():
76
    print '********** Deleting records from last 7 days tables *********'
77
    conn = getDbConnection()
78
    cursor = conn.cursor()
79
    deleteAllUserGroups = "delete from newuserlinkssegmentation7days"
80
    try:
81
        cursor.execute(deleteAllUserGroups)
82
        conn.commit()
83
    except:
84
        conn.rollback()
85
    finally:
86
        conn.close()    
87
        print '********** Completed Deleting records from last 7 days tables *********'
88
 
89
def addUsersForLinks():
90
    print '********** Inserting records in last 7 days table *********'
91
    user_id=''
92
    brand=''
93
    category_id='' 
94
    source=''
95
    count=''
96
    weight=''
97
 
98
    datesql= USER_SEGMENTATION_LINK_QUERY
99
    conn = getDbConnection()
100
    cursor = conn.cursor()
101
    cursor.execute(datesql)
102
    result = cursor.fetchall()
103
 
104
    for r in result:
105
        user_id=r[0]
106
        brand=r[1]
107
        if r[2] == 'Mobiles':
108
            category_id=3
109
        elif r[2] == 'Tablets':
110
            category_id=5
111
        else:
112
            category_id=r[2] 
113
        count=r[3]
114
        source=r[4]
115
        weight=r[5]
116
        if brand is not 'NA':
117
            sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
118
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
119
            conn.commit()
120
    conn.close()
121
    print '********** Completed inserting records for orders,preferences and clicks in last 7 days table *********'
122
 
123
def filters():
124
    print '********** Inserting records for filters in last 7 day table *********'
125
    conn = getDbConnection()
126
    cursor = conn.cursor()
127
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
128
    result = cursor.fetchall()
129
    filter_weight=result[0][0]
130
    todayDate=datetime.today().date()
131
    data = session.query((user_filters)).filter_by(type="brand").filter(~(user_filters.filters).like('NA')).filter(user_filters.created.between(todayDate-timedelta(days=6),todayDate)).all()
132
    for d in data:
133
        if userMap.has_key(d.user_id):
134
            brandMap = userMap.get(d.user_id) 
135
        else:
136
            brandMap = {}
137
        brands = d.filters.split('|')
138
        for brand in brands:
139
            if brandMap.has_key(brand):
140
                brandMap[brand] = brandMap.get(brand) + 1
141
            else:
142
                brandMap[brand] = 1
143
        userMap[d.user_id] = brandMap
144
    for x, y in userMap.iteritems():
145
        for bra,cow in y.iteritems():
146
            user_id=x
147
            brand=bra
148
            count=cow
149
            category_id=3
150
            weight=cow*filter_weight
151
            source='filters'
152
            if brand is not 'NA':
153
                sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
17329 manas 154
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
17325 manas 155
                conn.commit()
156
    conn.close()
157
    print '********** Completed inserting records for filters in last 7 day table *********'
158
 
159
def storeProductView():
160
    print '********** Inserting records for store product view in last 7 day table *********'
161
    user_id=''
162
    brand=''
163
    category_id='' 
164
    source=''
165
    count=''
166
    weight=''
167
 
168
    result = get_mongo_connection_new().User.browsinghistories.find({"$and":[{"url":{"$regex" : "/store_products\/view/"}},{"created":{"$gte":getLastSeventhDay()}}]})
169
 
170
    for r in result:
171
        matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
172
        if matchObj:
173
            skuBundleId = matchObj.group(2)
174
            if 'user_id' in skuBundleId or '=' in skuBundleId:
175
                newMatchObj = re.search('(.+?)\?',skuBundleId)
176
                if newMatchObj:
177
                    skuBundleId=newMatchObj.group(1)
178
                else:
179
                    print 'No match userId', skuBundleId
180
                    continue
181
            elif '#' in skuBundleId:
182
                newMatchObj = re.search('(.+?)\#',skuBundleId)
183
                if newMatchObj:
184
                    skuBundleId=newMatchObj.group(1)
185
                else:
186
                    print 'No match #', skuBundleId
187
                    continue
188
            if '%' in skuBundleId:
189
                newMatchObj = re.search('(.+?)\%',skuBundleId)
190
                if newMatchObj:
191
                    skuBundleId=newMatchObj.group(1)
192
                else:
193
                    print 'No match', skuBundleId                     
194
        else:
195
            print "No match!!"
196
        print skuBundleId            
197
        if skuBundleId.isdigit():
198
            print 'Inside is digit true'
199
            skuBundleId = int(skuBundleId)
200
        else:
201
            print 'Inside is digit false'
202
            skuBundleId =  int(float(skuBundleId))
203
 
204
        skuData = Mongo.getSkuBrandData(skuBundleId)
205
        if len(skuData) !=0:
206
            user_id=r.get('user_id')
207
            brand=skuData['brand']
208
            category_id =skuData['category_id']
209
            source='product_view'
210
            count=1
211
            if category_id==3:
212
                if userMobileCategory.has_key(user_id):
213
                    brandMap = userMobileCategory.get(user_id)
214
                else:
215
                    brandMap = {}
216
                if brandMap.has_key(brand):
217
                    brandMap[brand] = brandMap.get(brand) + 1 
218
                else:
219
                    brandMap[brand] = 1
220
                userMobileCategory[user_id] = brandMap
221
            if category_id==5:
222
                if userTabletCategory.has_key(user_id):
223
                    brandCategoryMap = userTabletCategory.get(user_id)
224
                else:
225
                    brandCategoryMap = {}
226
                if brandCategoryMap.has_key(brand):
227
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
228
                else:
229
                    brandCategoryMap[brand] = 1
230
                userTabletCategory[user_id] = brandCategoryMap    
231
        else:
232
            print 'Sku bundle Id not present',r.get('url')
233
 
234
    conn=getDbConnection()
235
    cursor=conn.cursor()
236
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
237
    result = cursor.fetchall()
238
    product_view_weight=result[0][0]
239
 
240
    for x, y in userMobileCategory.iteritems():
241
        for bra,cow in y.iteritems():
242
            user_id=x
243
            brand= bra
244
            count=cow
245
            weight=cow*product_view_weight
246
            source='product_view'
247
            category_id=3
17327 manas 248
            sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
17325 manas 249
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
250
            conn.commit()
251
 
252
    for x, y in userTabletCategory.iteritems():
253
        for bra,cow in y.iteritems():
254
            user_id=x
255
            brand= bra
256
            count=cow
257
            weight=cow*product_view_weight
258
            source='product_view'
259
            category_id=5
17327 manas 260
            sql = "replace into newuserlinkssegmentation7days (user_id, brand, category_id, source, count,weight,created) values(%s, %s, %s, %s, %s,%s,%s)"
17325 manas 261
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now()))
262
            conn.commit()
263
    conn.close()
264
 
265
    print '********** Completed inserting records for store product view in last 7 day table *********'
266
 
267
def findRepeatingOrders():
268
    print '********** Call to sanitize data for orders if any multiple *********'
269
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
270
    conn = getDbConnection()
271
    cursor = conn.cursor()
272
    rows_count = cursor.execute(datesql)
273
    if rows_count>0:
274
        result = cursor.fetchall()
275
        for r in result:
276
            print r
277
            findDetails(r[0],r[1],r[2])
278
    else:
279
        print 'No Data to be sanitized for repeating orders'            
280
 
281
def findDetails(userId,brand,category):
282
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
283
    conn = getDbConnection()
284
    cursor = conn.cursor()
285
    cursor.execute(datesql,(userId,brand))
286
    result = cursor.fetchall()
287
    for r in result:
288
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
289
        cursor = conn.cursor()
290
        cursor.execute(deletesql,(userId,brand))
291
        conn.commit()
292
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
293
        cursor = conn.cursor()
294
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
295
        conn.commit()
296
 
297
def findRepeatingClicks():
298
    print '********** Call to sanitize data for product view if any multiple *********'
299
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
300
    conn = getDbConnection()
301
    cursor = conn.cursor()
302
    rows_count = cursor.execute(datesql)
303
    if rows_count>0:
304
        result = cursor.fetchall()
305
        for r in result:
306
            print r
307
            findViewDetails(r[0],r[1],r[2])
308
    else:
309
        print 'No Data to be sanitized for product view'            
310
 
311
def findViewDetails(userId,brand,category):
312
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
313
    conn = getDbConnection()
314
    cursor = conn.cursor()
315
    cursor.execute(datesql,(userId,brand))
316
    result = cursor.fetchall()
317
    for r in result:
318
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
319
        cursor = conn.cursor()
320
        cursor.execute(deletesql,(userId,brand))
321
        conn.commit()
322
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
323
        cursor = conn.cursor()
324
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
325
        conn.commit()        
326
 
327
 
328
def checkCountForUpdate():
329
    print '********** Checking users brand whose cumulative weight is greater than 3 *********'
330
    datesql= "select user_id,brand,category_id from newuserlinkssegmentation7days group by user_id,brand,category_id having sum(weight) >=3;"
331
    conn = getDbConnection()
332
    cursor = conn.cursor()
333
    cursor.execute(datesql)
334
    result = cursor.fetchall()
335
    for r in result:
336
        checkUserForUpdate(r[0],r[1],r[2])
337
    conn.close()    
338
def checkUserForUpdate(userId,brand,category):
339
    print '********** Checking users brand present or not in the master table *********'
340
    sources = ['product_view','clicks','filters','orders','preferences']
17330 manas 341
    datesql= "select distinct brand from newuserlinkssegmentation where user_id=%s and brand=%s and category_id=%s" 
17325 manas 342
    conn = getDbConnection()
343
    cursor = conn.cursor()
344
    cursor.execute(datesql,(userId,brand,category))
345
    result = cursor.fetchone()
346
    print 'Result in Check user for update',result
347
    if len(result)==0:
348
        insertNewUserLinkDetails(userId,brand,category)
349
    else:
17330 manas 350
        datesql= "select distinct source from newuserlinkssegmentation where user_id=%s and brand=%s and category_id=%s"
17329 manas 351
        cursor.execute(datesql,(userId,result[0][0],category))
17325 manas 352
        result = cursor.fetchall()
353
        for source in result:
354
            userReplaceData(userId,brand,category, source)
355
            sources.remove(source)
356
        for source in sources:
357
            insertNewUserLinkDetailsSourceWise(userId,brand,category, source)
358
    conn.close()    
359
 
360
def userReplaceData(userId,brand,category,source):
361
    print 'Data that needs to be updated for', userId,brand,category
362
    conn = getDbConnection()
363
    cursor = conn.cursor()
17328 manas 364
    print REPLACE_QUERY%(userId,brand,category,source)
17325 manas 365
    cursor.execute(REPLACE_QUERY,(userId,brand,category,source))
366
    conn.commit()
367
    conn.close()
368
 
369
def insertNewUserLinkDetails(userId,brand,category):
370
    print 'Data needs to be inserted as the brand was not present earlier', userId,brand,category
371
    conn = getDbConnection()
17330 manas 372
    selectNewUser="select * from newuserlinkssegmentation7days where user_id=%s and brand=%s and category_id=%s"
17325 manas 373
    cursor = conn.cursor()
374
    cursor.execute(selectNewUser,(userId,brand,category))
375
    result = cursor.fetchall()
376
    print 'Result while inserting new record in master',result
377
    for r in result:
378
        sql = "insert into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
379
        cursor.execute(sql,(r[0],r[1],r[2], r[3], r[4],r[5],datetime.now(),datetime.now()))
380
        conn.commit()
381
    conn.close()
382
 
383
def insertNewUserLinkDetailsSourceWise(userId,brand,category,source):
384
    print 'Data needs to be inserted as the source was not present earlier', userId,brand,category,source
385
    conn = getDbConnection()
17330 manas 386
    selectNewUser="select * from newuserlinkssegmentation7days where user_id=%s and brand=%s and category_id=%s and source=%s"
17325 manas 387
    cursor = conn.cursor()
388
    cursor.execute(selectNewUser,(userId,brand,category,source))
389
    result = cursor.fetchall()
390
    print 'Result while inserting new record in master',result
391
    for r in result:
392
        sql = "insert into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
393
        cursor.execute(sql,(r[0],r[1],r[2], r[3], r[4],r[5],datetime.now(),datetime.now()))
394
        conn.commit()
395
    conn.close()    
396
 
397
def getLastSeventhDay():
398
    return to_java_date(datetime.today() - timedelta(days=7))
399
 
400
def main():
401
    print datetime.now()
402
    print '********** Starting to update links for Users *********'
403
    deleteUserForLinks()
404
    addUsersForLinks()
405
    filters()
406
    storeProductView()
407
    #findRepeatingOrders()
408
    #findRepeatingClicks()
409
    checkCountForUpdate()
410
    print '********** Finished updating records for user links *********'
411
if __name__=='__main__':
412
    main()