Subversion Repositories SmartDukaan

Rev

Rev 17317 | Rev 17323 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
17317 manas 1
import MySQLdb
2
from dtr.storage.DataService import brand_preferences
3
from elixir import *
4
from dtr.storage import DataService, Mongo
5
from dtr.storage.DataService import user_filters
6
from sqlalchemy.sql.expression import func, between
7
from datetime import datetime,timedelta
8
import re
9
import traceback
10
import pymongo
11
 
12
DataService.initialize(echo=True)
13
 
14
DB_HOST = "localhost"
15
DB_USER = "root"
16
DB_PASSWORD = "shop2020"
17
DB_NAME = "dtr"
18
con=None
19
 
20
USER_SEGMENTATION_LINK_QUERY="""
21
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
22
  from
23
    (
24
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
25
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
26
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
27
    c.brand not like '' and c.user_id!=0 group by c.category_id,c.brand,c.user_id
28
 
29
    UNION
30
 
31
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
32
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
33
    where m.categoryId is not null and m.brand is not null and m.userId!=0 group by m.categoryId,m.brand,m.userId
34
 
35
    UNION
36
 
37
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
38
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
39
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
40
    group by fo.category,fo.brand,fo.user_id
41
 
42
    UNION
43
 
44
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
45
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
46
    where user_id!=0 and status='show' group by bp.category_id,bp.brand,bp.user_id
47
) as x
48
"""
49
 
50
USER_STORE_PRODUCT_VIEW="""
51
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
52
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
53
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
54
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
55
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
56
    and datediff(curdate(),u.created)<7
57
    group by u.user_id,skuBundleId; 
58
"""
59
 
60
REPLACE_QUERY="""
61
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight ) 
62
SELECT user_id, brand, source,category_id, count, weight
63
FROM userlinkssegmentation7days
64
WHERE user_id =%s and brand=%s and category_id=%s
65
"""
66
 
67
 
68
userMap = {}
69
userMobileCategory={}
70
userTabletCategory={}
71
 
72
def getDbConnection():
73
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
74
 
75
 
76
def addUsersForLinks():
77
    print '********** Inserting records in master table *********'
78
    user_id=''
79
    brand=''
80
    category_id='' 
81
    source=''
82
    count=''
83
    weight=''
84
 
85
    datesql= USER_SEGMENTATION_LINK_QUERY
86
    conn = getDbConnection()
87
    cursor = conn.cursor()
88
    cursor.execute(datesql)
89
    result = cursor.fetchall()
90
 
91
    for r in result:
92
        user_id=r[0]
93
        brand=r[1]
94
        if r[2] == 'Mobiles':
95
            category_id=3
96
        elif r[2] == 'Tablets':
97
            category_id=5
98
        else:
99
            category_id=r[2] 
100
        count=r[3]
101
        source=r[4]
102
        weight=r[5]
17320 manas 103
        if brand is not 'NA':
104
            sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
105
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
106
            conn.commit()
17317 manas 107
    conn.close()
108
    print '********** Completed inserting records for orders,preferences and clicks in master table *********'
109
 
110
def filters():
111
    print '********** Inserting records for filters in master table *********'
112
    conn = getDbConnection()
113
    cursor = conn.cursor()
114
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
115
    result = cursor.fetchall()
116
    filter_weight=result[0][0]
117
    data = session.query((user_filters)).filter_by(type="brand").all()
118
    for d in data:
119
        if userMap.has_key(d.user_id):
120
            brandMap = userMap.get(d.user_id) 
121
        else:
122
            brandMap = {}
123
        brands = d.filters.split('|')
124
        for brand in brands:
125
            if brandMap.has_key(brand):
126
                brandMap[brand] = brandMap.get(brand) + 1
127
            else:
128
                brandMap[brand] = 1
129
        userMap[d.user_id] = brandMap
130
    for x, y in userMap.iteritems():
131
        for bra,cow in y.iteritems():
132
            user_id=x
133
            brand=bra
134
            count=cow
135
            category_id=3
136
            weight=cow*filter_weight
137
            source='filters'
17320 manas 138
            if brand is not 'NA':
139
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
140
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
141
                conn.commit()
17317 manas 142
    conn.close()
143
    print '********** Completed inserting records for filters in master table *********'
144
 
145
def storeProductView():
146
    print '********** Inserting records for store product view in master table *********'
147
    user_id=''
148
    brand=''
149
    category_id='' 
150
    source=''
151
    count=''
152
    weight=''
153
 
154
    result = get_mongo_connection_new().User.browsinghistories.find({"url":{"$regex" : "/store_products\/view/"}})
155
 
156
    for r in result:
157
        matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
158
        if matchObj:
159
            skuBundleId = matchObj.group(2)
160
            if 'user_id' in skuBundleId or '=' in skuBundleId:
161
                newMatchObj = re.search('(.+?)\?',skuBundleId)
162
                if newMatchObj:
163
                    skuBundleId=newMatchObj.group(1)
164
                else:
165
                    print 'No match userId', skuBundleId
166
                    continue
167
            elif '#' in skuBundleId:
168
                newMatchObj = re.search('(.+?)\#',skuBundleId)
169
                if newMatchObj:
170
                    skuBundleId=newMatchObj.group(1)
171
                else:
172
                    print 'No match #', skuBundleId
173
                    continue
174
            if '%' in skuBundleId:
175
                newMatchObj = re.search('(.+?)\%',skuBundleId)
176
                if newMatchObj:
177
                    skuBundleId=newMatchObj.group(1)
178
                else:
179
                    print 'No match', skuBundleId                     
180
        else:
181
            print "No match!!"
182
        print skuBundleId            
183
        if skuBundleId.isdigit():
184
            print 'Inside is digit true'
185
            skuBundleId = int(skuBundleId)
186
        else:
187
            print 'Inside is digit false'
188
            skuBundleId =  int(float(skuBundleId))
189
 
190
        skuData = Mongo.getSkuBrandData(skuBundleId)
191
        if len(skuData) !=0:
192
            user_id=r.get('user_id')
193
            brand=skuData['brand']
194
            category_id =skuData['category_id']
195
            source='product_view'
196
            count=1
197
            if category_id==3:
198
                if userMobileCategory.has_key(user_id):
199
                    brandMap = userMobileCategory.get(user_id)
200
                else:
201
                    brandMap = {}
202
                if brandMap.has_key(brand):
203
                    brandMap[brand] = brandMap.get(brand) + 1 
204
                else:
205
                    brandMap[brand] = 1
206
                userMobileCategory[user_id] = brandMap
207
            if category_id==5:
208
                if userTabletCategory.has_key(user_id):
209
                    brandCategoryMap = userTabletCategory.get(user_id)
210
                else:
211
                    brandCategoryMap = {}
212
                if brandCategoryMap.has_key(brand):
213
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
214
                else:
215
                    brandCategoryMap[brand] = 1
216
                userTabletCategory[user_id] = brandCategoryMap    
217
        else:
218
            print 'Sku bundle Id not present',r.get('url')
219
 
220
    conn=getDbConnection()
221
    cursor=conn.cursor()
222
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
223
    result = cursor.fetchall()
224
    product_view_weight=result[0][0]
225
 
226
    try: 
227
        for x, y in userMobileCategory.iteritems():
228
            for bra,cow in y.iteritems():
229
                user_id=x
230
                brand= bra
231
                count=cow
232
                weight=cow*product_view_weight
233
                source='product_view'
234
                category_id=3
235
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
236
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
237
                conn.commit()
238
 
239
        for x, y in userTabletCategory.iteritems():
240
            for bra,cow in y.iteritems():
241
                user_id=x
242
                brand= bra
243
                count=cow
244
                weight=cow*product_view_weight
245
                source='product_view'
246
                category_id=5
247
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
248
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
249
                conn.commit()
250
    except:
251
        print traceback.print_exc()
252
    conn.close()
253
 
254
    print '********** Completed inserting records for store product view in master table *********'
255
 
256
def findRepeatingOrders():
257
    print '********** Call to sanitize data for orders if any multiple *********'
258
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
259
    conn = getDbConnection()
260
    cursor = conn.cursor()
261
    rows_count = cursor.execute(datesql)
262
    if rows_count>0:
263
        result = cursor.fetchall()
264
        for r in result:
265
            print r
266
            findDetails(r[0],r[1],r[2])
267
    else:
268
        print 'No Data to be sanitized for repeating orders'            
269
 
270
def findDetails(userId,brand,category):
271
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
272
    conn = getDbConnection()
273
    cursor = conn.cursor()
274
    cursor.execute(datesql,(userId,brand))
275
    result = cursor.fetchall()
276
    for r in result:
277
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
278
        cursor = conn.cursor()
279
        cursor.execute(deletesql,(userId,brand))
280
        conn.commit()
281
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
282
        cursor = conn.cursor()
283
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
284
        conn.commit()
285
 
286
def findRepeatingClicks():
287
    print '********** Call to sanitize data for product view if any multiple *********'
288
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
289
    conn = getDbConnection()
290
    cursor = conn.cursor()
291
    rows_count = cursor.execute(datesql)
292
    if rows_count>0:
293
        result = cursor.fetchall()
294
        for r in result:
295
            print r
296
            findViewDetails(r[0],r[1],r[2])
297
    else:
298
        print 'No Data to be sanitized for product view'            
299
 
300
def findViewDetails(userId,brand,category):
301
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
302
    conn = getDbConnection()
303
    cursor = conn.cursor()
304
    cursor.execute(datesql,(userId,brand))
305
    result = cursor.fetchall()
306
    for r in result:
307
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
308
        cursor = conn.cursor()
309
        cursor.execute(deletesql,(userId,brand))
310
        conn.commit()
311
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
312
        cursor = conn.cursor()
313
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
314
        conn.commit()        
315
 
316
def get_mongo_connection_new(host='45.33.50.227', port=27017):
317
    global con
318
    if con is None:
319
        print "Establishing connection %s host and port %d" %(host,port)
320
        try:
321
            con = pymongo.MongoClient(host, port)
322
        except Exception, e:
323
            print e
324
            return None
325
    return con
326
 
327
def main():
328
    print datetime.now()
329
    print '********** Starting to generate Master Data Users *********'
330
    addUsersForLinks()
331
    filters()
332
    storeProductView()
333
    print '********** Finished updating records for user links *********'
334
if __name__=='__main__':
335
    main()