Subversion Repositories SmartDukaan

Rev

Rev 17320 | Rev 20689 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
17317 manas 1
import MySQLdb
2
from dtr.storage.DataService import brand_preferences
3
from elixir import *
4
from dtr.storage import DataService, Mongo
5
from dtr.storage.DataService import user_filters
6
from sqlalchemy.sql.expression import func, between
7
from datetime import datetime,timedelta
8
import re
9
import traceback
10
import pymongo
17323 manas 11
from operator import and_
17317 manas 12
 
13
DataService.initialize(echo=True)
14
 
15
DB_HOST = "localhost"
16
DB_USER = "root"
17
DB_PASSWORD = "shop2020"
18
DB_NAME = "dtr"
19
con=None
20
 
21
USER_SEGMENTATION_LINK_QUERY="""
22
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight 
23
  from
24
    (
25
    select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight 
26
    from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
27
    where  c.category_id is not null and c.category_id !=0 and c.brand is not null and 
28
    c.brand not like '' and c.user_id!=0 group by c.category_id,c.brand,c.user_id
29
 
30
    UNION
31
 
32
    select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
33
    from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
17323 manas 34
    where m.categoryId is not null and m.brand is not null and m.userId!=0 and m.brand not like 'NA' group by m.categoryId,m.brand,m.userId
17317 manas 35
 
36
    UNION
37
 
38
    select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
39
    from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type 
40
    where  fo.category is not null and fo.brand is not null and  fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null 
41
    group by fo.category,fo.brand,fo.user_id
42
 
43
    UNION
44
 
45
    select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
46
    from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type  
47
    where user_id!=0 and status='show' group by bp.category_id,bp.brand,bp.user_id
48
) as x
49
"""
50
 
51
USER_STORE_PRODUCT_VIEW="""
52
    SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1)  
53
    AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
54
    FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type 
55
    WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and 
56
    SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url,  '/', -1 ),'?',1),'%',1),'#',1) not like ''
57
    and datediff(curdate(),u.created)<7
58
    group by u.user_id,skuBundleId; 
59
"""
60
 
61
REPLACE_QUERY="""
62
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight ) 
63
SELECT user_id, brand, source,category_id, count, weight
64
FROM userlinkssegmentation7days
65
WHERE user_id =%s and brand=%s and category_id=%s
66
"""
67
 
68
 
69
userMap = {}
70
userMobileCategory={}
71
userTabletCategory={}
72
 
73
def getDbConnection():
74
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
75
 
76
 
77
def addUsersForLinks():
78
    print '********** Inserting records in master table *********'
79
    user_id=''
80
    brand=''
81
    category_id='' 
82
    source=''
83
    count=''
84
    weight=''
85
 
86
    datesql= USER_SEGMENTATION_LINK_QUERY
87
    conn = getDbConnection()
88
    cursor = conn.cursor()
89
    cursor.execute(datesql)
90
    result = cursor.fetchall()
91
 
92
    for r in result:
93
        user_id=r[0]
94
        brand=r[1]
95
        if r[2] == 'Mobiles':
96
            category_id=3
97
        elif r[2] == 'Tablets':
98
            category_id=5
99
        else:
100
            category_id=r[2] 
101
        count=r[3]
102
        source=r[4]
103
        weight=r[5]
17320 manas 104
        if brand is not 'NA':
105
            sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
106
            cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
107
            conn.commit()
17317 manas 108
    conn.close()
109
    print '********** Completed inserting records for orders,preferences and clicks in master table *********'
110
 
111
def filters():
112
    print '********** Inserting records for filters in master table *********'
113
    conn = getDbConnection()
114
    cursor = conn.cursor()
115
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
116
    result = cursor.fetchall()
117
    filter_weight=result[0][0]
17323 manas 118
    data = session.query((user_filters)).filter_by(type="brand").filter(~(user_filters.filters).like('NA')).all()
17317 manas 119
    for d in data:
120
        if userMap.has_key(d.user_id):
121
            brandMap = userMap.get(d.user_id) 
122
        else:
123
            brandMap = {}
124
        brands = d.filters.split('|')
125
        for brand in brands:
126
            if brandMap.has_key(brand):
127
                brandMap[brand] = brandMap.get(brand) + 1
128
            else:
129
                brandMap[brand] = 1
130
        userMap[d.user_id] = brandMap
131
    for x, y in userMap.iteritems():
132
        for bra,cow in y.iteritems():
133
            user_id=x
134
            brand=bra
135
            count=cow
136
            category_id=3
137
            weight=cow*filter_weight
138
            source='filters'
17320 manas 139
            if brand is not 'NA':
140
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
141
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
142
                conn.commit()
17317 manas 143
    conn.close()
144
    print '********** Completed inserting records for filters in master table *********'
145
 
146
def storeProductView():
147
    print '********** Inserting records for store product view in master table *********'
148
    user_id=''
149
    brand=''
150
    category_id='' 
151
    source=''
152
    count=''
153
    weight=''
154
 
155
    result = get_mongo_connection_new().User.browsinghistories.find({"url":{"$regex" : "/store_products\/view/"}})
156
 
157
    for r in result:
158
        matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
159
        if matchObj:
160
            skuBundleId = matchObj.group(2)
161
            if 'user_id' in skuBundleId or '=' in skuBundleId:
162
                newMatchObj = re.search('(.+?)\?',skuBundleId)
163
                if newMatchObj:
164
                    skuBundleId=newMatchObj.group(1)
165
                else:
166
                    print 'No match userId', skuBundleId
167
                    continue
168
            elif '#' in skuBundleId:
169
                newMatchObj = re.search('(.+?)\#',skuBundleId)
170
                if newMatchObj:
171
                    skuBundleId=newMatchObj.group(1)
172
                else:
173
                    print 'No match #', skuBundleId
174
                    continue
175
            if '%' in skuBundleId:
176
                newMatchObj = re.search('(.+?)\%',skuBundleId)
177
                if newMatchObj:
178
                    skuBundleId=newMatchObj.group(1)
179
                else:
180
                    print 'No match', skuBundleId                     
181
        else:
182
            print "No match!!"
183
        print skuBundleId            
184
        if skuBundleId.isdigit():
185
            print 'Inside is digit true'
186
            skuBundleId = int(skuBundleId)
187
        else:
188
            print 'Inside is digit false'
189
            skuBundleId =  int(float(skuBundleId))
190
 
191
        skuData = Mongo.getSkuBrandData(skuBundleId)
192
        if len(skuData) !=0:
193
            user_id=r.get('user_id')
194
            brand=skuData['brand']
195
            category_id =skuData['category_id']
196
            source='product_view'
197
            count=1
198
            if category_id==3:
199
                if userMobileCategory.has_key(user_id):
200
                    brandMap = userMobileCategory.get(user_id)
201
                else:
202
                    brandMap = {}
203
                if brandMap.has_key(brand):
204
                    brandMap[brand] = brandMap.get(brand) + 1 
205
                else:
206
                    brandMap[brand] = 1
207
                userMobileCategory[user_id] = brandMap
208
            if category_id==5:
209
                if userTabletCategory.has_key(user_id):
210
                    brandCategoryMap = userTabletCategory.get(user_id)
211
                else:
212
                    brandCategoryMap = {}
213
                if brandCategoryMap.has_key(brand):
214
                    brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
215
                else:
216
                    brandCategoryMap[brand] = 1
217
                userTabletCategory[user_id] = brandCategoryMap    
218
        else:
219
            print 'Sku bundle Id not present',r.get('url')
220
 
221
    conn=getDbConnection()
222
    cursor=conn.cursor()
223
    cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
224
    result = cursor.fetchall()
225
    product_view_weight=result[0][0]
226
 
227
    try: 
228
        for x, y in userMobileCategory.iteritems():
229
            for bra,cow in y.iteritems():
230
                user_id=x
231
                brand= bra
232
                count=cow
233
                weight=cow*product_view_weight
234
                source='product_view'
235
                category_id=3
236
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
237
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
238
                conn.commit()
239
 
240
        for x, y in userTabletCategory.iteritems():
241
            for bra,cow in y.iteritems():
242
                user_id=x
243
                brand= bra
244
                count=cow
245
                weight=cow*product_view_weight
246
                source='product_view'
247
                category_id=5
248
                sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
249
                cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
250
                conn.commit()
251
    except:
252
        print traceback.print_exc()
253
    conn.close()
254
 
255
    print '********** Completed inserting records for store product view in master table *********'
256
 
257
def findRepeatingOrders():
258
    print '********** Call to sanitize data for orders if any multiple *********'
259
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
260
    conn = getDbConnection()
261
    cursor = conn.cursor()
262
    rows_count = cursor.execute(datesql)
263
    if rows_count>0:
264
        result = cursor.fetchall()
265
        for r in result:
266
            print r
267
            findDetails(r[0],r[1],r[2])
268
    else:
269
        print 'No Data to be sanitized for repeating orders'            
270
 
271
def findDetails(userId,brand,category):
272
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
273
    conn = getDbConnection()
274
    cursor = conn.cursor()
275
    cursor.execute(datesql,(userId,brand))
276
    result = cursor.fetchall()
277
    for r in result:
278
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
279
        cursor = conn.cursor()
280
        cursor.execute(deletesql,(userId,brand))
281
        conn.commit()
282
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
283
        cursor = conn.cursor()
284
        cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
285
        conn.commit()
286
 
287
def findRepeatingClicks():
288
    print '********** Call to sanitize data for product view if any multiple *********'
289
    datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
290
    conn = getDbConnection()
291
    cursor = conn.cursor()
292
    rows_count = cursor.execute(datesql)
293
    if rows_count>0:
294
        result = cursor.fetchall()
295
        for r in result:
296
            print r
297
            findViewDetails(r[0],r[1],r[2])
298
    else:
299
        print 'No Data to be sanitized for product view'            
300
 
301
def findViewDetails(userId,brand,category):
302
    datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
303
    conn = getDbConnection()
304
    cursor = conn.cursor()
305
    cursor.execute(datesql,(userId,brand))
306
    result = cursor.fetchall()
307
    for r in result:
308
        deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
309
        cursor = conn.cursor()
310
        cursor.execute(deletesql,(userId,brand))
311
        conn.commit()
312
        insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
313
        cursor = conn.cursor()
314
        cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
315
        conn.commit()        
316
 
317
def get_mongo_connection_new(host='45.33.50.227', port=27017):
318
    global con
319
    if con is None:
320
        print "Establishing connection %s host and port %d" %(host,port)
321
        try:
322
            con = pymongo.MongoClient(host, port)
323
        except Exception, e:
324
            print e
325
            return None
326
    return con
327
 
328
def main():
329
    print datetime.now()
330
    print '********** Starting to generate Master Data Users *********'
331
    addUsersForLinks()
332
    filters()
333
    storeProductView()
334
    print '********** Finished updating records for user links *********'
335
if __name__=='__main__':
336
    main()