Subversion Repositories SmartDukaan

Rev

Rev 17320 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
17317 manas 1
import MySQLdb
2
from dtr.storage.DataService import brand_preferences
3
from elixir import *
4
from dtr.storage import DataService, Mongo
5
from dtr.storage.DataService import user_filters
6
from sqlalchemy.sql.expression import func, between
7
from datetime import datetime,timedelta
8
import re
9
import traceback
10
import pymongo
11
 
12
DataService.initialize(echo=True)
13
 
14
DB_HOST = "localhost"
15
DB_USER = "root"
16
DB_PASSWORD = "shop2020"
17
DB_NAME = "dtr"
18
con=None
19
 
20
USER_SEGMENTATION_LINK_QUERY="""
21
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
22
  from
23
    (
24
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
25
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
26
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
27
    c.brand not like '' and c.user_id!=0 group by c.category_id,c.brand,c.user_id
28
 
29
    UNION
30
 
31
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
32
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
33
    where m.categoryId is not null and m.brand is not null and m.userId!=0 group by m.categoryId,m.brand,m.userId
34
 
35
    UNION
36
 
37
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
38
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
39
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
40
    group by fo.category,fo.brand,fo.user_id
41
 
42
    UNION
43
 
44
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
45
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
46
    where user_id!=0 and status='show' group by bp.category_id,bp.brand,bp.user_id
47
) as x
48
"""
49
 
50
USER_STORE_PRODUCT_VIEW="""
51
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
52
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
53
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
54
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
55
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
56
    and datediff(curdate(),u.created)<7
57
    group by u.user_id,skuBundleId; 
58
"""
59
 
60
REPLACE_QUERY="""
61
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight ) 
62
SELECT user_id, brand, source,category_id, count, weight
63
FROM userlinkssegmentation7days
64
WHERE user_id =%s and brand=%s and category_id=%s
65
"""
66
 
67
 
68
userMap = {}
69
userMobileCategory={}
70
userTabletCategory={}
71
 
72
def getDbConnection():
73
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
74
 
75
 
76
def addUsersForLinks():
77
    print '********** Inserting records in master table *********'
78
    user_id=''
79
    brand=''
80
    category_id='' 
81
    source=''
82
    count=''
83
    weight=''
84
 
85
    datesql= USER_SEGMENTATION_LINK_QUERY
86
    conn = getDbConnection()
87
    cursor = conn.cursor()
88
    cursor.execute(datesql)
89
    result = cursor.fetchall()
90
 
91
    for r in result:
92
        user_id=r[0]
93
        brand=r[1]
94
        if r[2] == 'Mobiles':
95
            category_id=3
96
        elif r[2] == 'Tablets':
97
            category_id=5
98
        else:
99
            category_id=r[2] 
100
        count=r[3]
101
        source=r[4]
102
        weight=r[5]
103
        sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
104
        cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
105
        conn.commit()
106
    conn.close()
107
    print '********** Completed inserting records for orders,preferences and clicks in master table *********'
108
 
109
def filters():
110
    print '********** Inserting records for filters in master table *********'
111
    conn = getDbConnection()
112
    cursor = conn.cursor()
113
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
114
    result = cursor.fetchall()
115
    filter_weight=result[0][0]
116
    data = session.query((user_filters)).filter_by(type="brand").all()
117
    for d in data:
118
        if userMap.has_key(d.user_id):
119
            brandMap = userMap.get(d.user_id) 
120
        else:
121
            brandMap = {}
122
        brands = d.filters.split('|')
123
        for brand in brands:
124
            if brandMap.has_key(brand):
125
                brandMap[brand] = brandMap.get(brand) + 1
126
            else:
127
                brandMap[brand] = 1
128
        userMap[d.user_id] = brandMap
129
    for x, y in userMap.iteritems():
130
        for bra,cow in y.iteritems():
131
            user_id=x
132
            brand=bra
133
            count=cow
134
            category_id=3
135
            weight=cow*filter_weight
136
            source='filters'
137
            sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
138
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
139
            conn.commit()
140
 
141
    conn.close()
142
    print '********** Completed inserting records for filters in master table *********'
143
 
144
def storeProductView():
145
    print '********** Inserting records for store product view in master table *********'
146
    user_id=''
147
    brand=''
148
    category_id='' 
149
    source=''
150
    count=''
151
    weight=''
152
 
153
    result = get_mongo_connection_new().User.browsinghistories.find({"url":{"$regex" : "/store_products\/view/"}})
154
 
155
    for r in result:
156
        matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
157
        if matchObj:
158
            skuBundleId = matchObj.group(2)
159
            if 'user_id' in skuBundleId or '=' in skuBundleId:
160
                newMatchObj = re.search('(.+?)\?',skuBundleId)
161
                if newMatchObj:
162
                    skuBundleId=newMatchObj.group(1)
163
                else:
164
                    print 'No match userId', skuBundleId
165
                    continue
166
            elif '#' in skuBundleId:
167
                newMatchObj = re.search('(.+?)\#',skuBundleId)
168
                if newMatchObj:
169
                    skuBundleId=newMatchObj.group(1)
170
                else:
171
                    print 'No match #', skuBundleId
172
                    continue
173
            if '%' in skuBundleId:
174
                newMatchObj = re.search('(.+?)\%',skuBundleId)
175
                if newMatchObj:
176
                    skuBundleId=newMatchObj.group(1)
177
                else:
178
                    print 'No match', skuBundleId                     
179
        else:
180
            print "No match!!"
181
        print skuBundleId            
182
        if skuBundleId.isdigit():
183
            print 'Inside is digit true'
184
            skuBundleId = int(skuBundleId)
185
        else:
186
            print 'Inside is digit false'
187
            skuBundleId =  int(float(skuBundleId))
188
 
189
        skuData = Mongo.getSkuBrandData(skuBundleId)
190
        if len(skuData) !=0:
191
            user_id=r.get('user_id')
192
            brand=skuData['brand']
193
            category_id =skuData['category_id']
194
            source='product_view'
195
            count=1
196
            if category_id==3:
197
                if userMobileCategory.has_key(user_id):
198
                    brandMap = userMobileCategory.get(user_id)
199
                else:
200
                    brandMap = {}
201
                if brandMap.has_key(brand):
202
                    brandMap[brand] = brandMap.get(brand) + 1 
203
                else:
204
                    brandMap[brand] = 1
205
                userMobileCategory[user_id] = brandMap
206
            if category_id==5:
207
                if userTabletCategory.has_key(user_id):
208
                    brandCategoryMap = userTabletCategory.get(user_id)
209
                else:
210
                    brandCategoryMap = {}
211
                if brandCategoryMap.has_key(brand):
212
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
213
                else:
214
                    brandCategoryMap[brand] = 1
215
                userTabletCategory[user_id] = brandCategoryMap    
216
        else:
217
            print 'Sku bundle Id not present',r.get('url')
218
 
219
    conn=getDbConnection()
220
    cursor=conn.cursor()
221
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
222
    result = cursor.fetchall()
223
    product_view_weight=result[0][0]
224
 
225
    try: 
226
        for x, y in userMobileCategory.iteritems():
227
            for bra,cow in y.iteritems():
228
                user_id=x
229
                brand= bra
230
                count=cow
231
                weight=cow*product_view_weight
232
                source='product_view'
233
                category_id=3
234
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
235
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
236
                conn.commit()
237
 
238
        for x, y in userTabletCategory.iteritems():
239
            for bra,cow in y.iteritems():
240
                user_id=x
241
                brand= bra
242
                count=cow
243
                weight=cow*product_view_weight
244
                source='product_view'
245
                category_id=5
246
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
247
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
248
                conn.commit()
249
    except:
250
        print traceback.print_exc()
251
    conn.close()
252
 
253
    print '********** Completed inserting records for store product view in master table *********'
254
 
255
def findRepeatingOrders():
256
    print '********** Call to sanitize data for orders if any multiple *********'
257
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
258
    conn = getDbConnection()
259
    cursor = conn.cursor()
260
    rows_count = cursor.execute(datesql)
261
    if rows_count>0:
262
        result = cursor.fetchall()
263
        for r in result:
264
            print r
265
            findDetails(r[0],r[1],r[2])
266
    else:
267
        print 'No Data to be sanitized for repeating orders'            
268
 
269
def findDetails(userId,brand,category):
270
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
271
    conn = getDbConnection()
272
    cursor = conn.cursor()
273
    cursor.execute(datesql,(userId,brand))
274
    result = cursor.fetchall()
275
    for r in result:
276
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
277
        cursor = conn.cursor()
278
        cursor.execute(deletesql,(userId,brand))
279
        conn.commit()
280
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
281
        cursor = conn.cursor()
282
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
283
        conn.commit()
284
 
285
def findRepeatingClicks():
286
    print '********** Call to sanitize data for product view if any multiple *********'
287
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
288
    conn = getDbConnection()
289
    cursor = conn.cursor()
290
    rows_count = cursor.execute(datesql)
291
    if rows_count>0:
292
        result = cursor.fetchall()
293
        for r in result:
294
            print r
295
            findViewDetails(r[0],r[1],r[2])
296
    else:
297
        print 'No Data to be sanitized for product view'            
298
 
299
def findViewDetails(userId,brand,category):
300
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
301
    conn = getDbConnection()
302
    cursor = conn.cursor()
303
    cursor.execute(datesql,(userId,brand))
304
    result = cursor.fetchall()
305
    for r in result:
306
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
307
        cursor = conn.cursor()
308
        cursor.execute(deletesql,(userId,brand))
309
        conn.commit()
310
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
311
        cursor = conn.cursor()
312
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
313
        conn.commit()        
314
 
315
def get_mongo_connection_new(host='45.33.50.227', port=27017):
316
    global con
317
    if con is None:
318
        print "Establishing connection %s host and port %d" %(host,port)
319
        try:
320
            con = pymongo.MongoClient(host, port)
321
        except Exception, e:
322
            print e
323
            return None
324
    return con
325
 
326
def main():
327
    print datetime.now()
328
    print '********** Starting to generate Master Data Users *********'
329
    addUsersForLinks()
330
    filters()
331
    storeProductView()
332
    print '********** Finished updating records for user links *********'
333
if __name__=='__main__':
334
    main()