| 16356 |
manas |
1 |
import MySQLdb
|
|
|
2 |
from dtr.storage.DataService import brand_preferences
|
| 16625 |
manas |
3 |
from elixir import *
|
|
|
4 |
from dtr.storage import DataService, Mongo
|
|
|
5 |
from dtr.storage.DataService import user_filters
|
|
|
6 |
from sqlalchemy.sql.expression import func, between
|
|
|
7 |
from datetime import datetime,timedelta
|
|
|
8 |
DataService.initialize(echo=True)
|
| 16356 |
manas |
9 |
|
|
|
10 |
DB_HOST = "localhost"
|
|
|
11 |
DB_USER = "root"
|
|
|
12 |
DB_PASSWORD = "shop2020"
|
|
|
13 |
DB_NAME = "dtr"
|
|
|
14 |
|
|
|
15 |
USER_SEGMENTATION_LINK_QUERY="""
|
| 16359 |
manas |
16 |
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight
|
| 16356 |
manas |
17 |
from
|
|
|
18 |
(
|
| 16359 |
manas |
19 |
select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight
|
| 16356 |
manas |
20 |
from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
|
|
|
21 |
where c.category_id is not null and c.category_id !=0 and c.brand is not null and
|
| 16625 |
manas |
22 |
c.brand not like '' and c.user_id!=0 and datediff(curdate(),c.created)<7 group by c.category_id,c.brand,c.user_id
|
| 16356 |
manas |
23 |
|
|
|
24 |
UNION
|
| 16359 |
manas |
25 |
|
|
|
26 |
select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
|
|
|
27 |
from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type
|
| 16625 |
manas |
28 |
where m.categoryId is not null and m.brand is not null and m.userId!=0 and datediff(curdate(),m.createdTime)<7 group by m.categoryId,m.brand,m.userId
|
| 16356 |
manas |
29 |
|
|
|
30 |
UNION
|
| 16359 |
manas |
31 |
|
|
|
32 |
select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
|
| 16356 |
manas |
33 |
from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type
|
| 16625 |
manas |
34 |
where fo.category is not null and fo.brand is not null and fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null
|
|
|
35 |
and datediff(curdate(),fo.created)<7 group by fo.category,fo.brand,fo.user_id
|
| 16359 |
manas |
36 |
|
| 16356 |
manas |
37 |
UNION
|
| 16359 |
manas |
38 |
|
| 16360 |
manas |
39 |
select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
|
| 16359 |
manas |
40 |
from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type
|
| 16625 |
manas |
41 |
where user_id!=0 and status='show' and datediff(curdate(),bp.created)<7 group by bp.category_id,bp.brand,bp.user_id
|
| 16359 |
manas |
42 |
) as x
|
|
|
43 |
|
| 16356 |
manas |
44 |
"""
|
|
|
45 |
|
| 16625 |
manas |
46 |
USER_STORE_PRODUCT_VIEW="""
|
| 16627 |
manas |
47 |
SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url, '/', -1 ),'?',1),'%',1),'#',1)
|
| 16625 |
manas |
48 |
AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
|
|
|
49 |
FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type
|
|
|
50 |
WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and
|
| 16627 |
manas |
51 |
SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url, '/', -1 ),'?',1),'%',1),'#',1) not like ''
|
| 16625 |
manas |
52 |
and datediff(curdate(),u.created)<7
|
|
|
53 |
group by u.user_id,skuBundleId;
|
|
|
54 |
"""
|
| 16356 |
manas |
55 |
|
| 16625 |
manas |
56 |
REPLACE_QUERY="""
|
|
|
57 |
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight )
|
|
|
58 |
SELECT user_id, brand, source,category_id, count, weight
|
|
|
59 |
FROM userlinkssegmentation7days
|
|
|
60 |
WHERE user_id =%s and brand=%s and category_id=%s
|
|
|
61 |
"""
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
userMap = {} #key - user_id val - brand map
|
|
|
65 |
userMobileCategory={}
|
|
|
66 |
userTabletCategory={}
|
|
|
67 |
|
| 16356 |
manas |
68 |
def getDbConnection():
|
|
|
69 |
return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
|
|
|
70 |
|
| 16625 |
manas |
71 |
|
|
|
72 |
def deleteUserForLinks():
|
|
|
73 |
print '********** Deleting records from last 7 days tables *********'
|
| 16356 |
manas |
74 |
conn = getDbConnection()
|
|
|
75 |
cursor = conn.cursor()
|
| 16625 |
manas |
76 |
deleteAllUserGroups = "delete from userlinkssegmentation7days"
|
|
|
77 |
try:
|
|
|
78 |
cursor.execute(deleteAllUserGroups)
|
|
|
79 |
conn.commit()
|
|
|
80 |
except:
|
|
|
81 |
conn.rollback()
|
|
|
82 |
finally:
|
|
|
83 |
conn.close()
|
|
|
84 |
print '********** Completed Deleting records from last 7 days tables *********'
|
| 16356 |
manas |
85 |
|
|
|
86 |
def addUsersForLinks():
|
| 16625 |
manas |
87 |
print '********** Inserting records in last 7 days table *********'
|
| 16356 |
manas |
88 |
user_id=''
|
|
|
89 |
brand=''
|
|
|
90 |
category_id=''
|
|
|
91 |
source=''
|
|
|
92 |
count=''
|
|
|
93 |
weight=''
|
|
|
94 |
|
|
|
95 |
datesql= USER_SEGMENTATION_LINK_QUERY
|
|
|
96 |
conn = getDbConnection()
|
|
|
97 |
cursor = conn.cursor()
|
|
|
98 |
cursor.execute(datesql)
|
|
|
99 |
result = cursor.fetchall()
|
|
|
100 |
|
|
|
101 |
for r in result:
|
|
|
102 |
user_id=r[0]
|
| 16359 |
manas |
103 |
brand=r[1]
|
|
|
104 |
if r[2] == 'Mobiles':
|
| 16356 |
manas |
105 |
category_id=3
|
| 16359 |
manas |
106 |
elif r[2] == 'Tablets':
|
| 16356 |
manas |
107 |
category_id=5
|
|
|
108 |
else:
|
| 16359 |
manas |
109 |
category_id=r[2]
|
| 16362 |
manas |
110 |
count=r[3]
|
|
|
111 |
source=r[4]
|
| 16359 |
manas |
112 |
weight=r[5]
|
| 16625 |
manas |
113 |
sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
| 16359 |
manas |
114 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
|
| 16356 |
manas |
115 |
conn.commit()
|
| 16625 |
manas |
116 |
conn.close()
|
|
|
117 |
print '********** Completed inserting records for orders,preferences and clicks in last 7 days table *********'
|
|
|
118 |
|
|
|
119 |
def filters():
|
|
|
120 |
print '********** Inserting records for filters in last 7 day table *********'
|
|
|
121 |
conn = getDbConnection()
|
|
|
122 |
cursor = conn.cursor()
|
|
|
123 |
cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
|
|
|
124 |
result = cursor.fetchall()
|
|
|
125 |
filter_weight=result[0][0]
|
|
|
126 |
todayDate=datetime.today().date()
|
|
|
127 |
data = session.query((user_filters)).filter_by(type="brand").filter(user_filters.created.between(todayDate-timedelta(days=6),todayDate)).all()
|
|
|
128 |
for d in data:
|
|
|
129 |
if userMap.has_key(d.user_id):
|
|
|
130 |
brandMap = userMap.get(d.user_id)
|
|
|
131 |
else:
|
|
|
132 |
brandMap = {}
|
|
|
133 |
brands = d.filters.split('|')
|
|
|
134 |
for brand in brands:
|
|
|
135 |
if brandMap.has_key(brand):
|
|
|
136 |
brandMap[brand] = brandMap.get(brand) + 1
|
|
|
137 |
else:
|
|
|
138 |
brandMap[brand] = 1
|
|
|
139 |
userMap[d.user_id] = brandMap
|
|
|
140 |
for x, y in userMap.iteritems():
|
|
|
141 |
for bra,cow in y.iteritems():
|
|
|
142 |
user_id=x
|
|
|
143 |
brand=bra
|
|
|
144 |
count=cow
|
|
|
145 |
category_id=3
|
|
|
146 |
weight=cow*filter_weight
|
|
|
147 |
source='filters'
|
|
|
148 |
sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
149 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
|
|
|
150 |
conn.commit()
|
|
|
151 |
|
|
|
152 |
conn.close()
|
|
|
153 |
print '********** Completed inserting records for filters in last 7 day table *********'
|
|
|
154 |
|
|
|
155 |
def storeProductView():
|
|
|
156 |
print '********** Inserting records for store product view in last 7 day table *********'
|
|
|
157 |
user_id=''
|
|
|
158 |
brand=''
|
|
|
159 |
category_id=''
|
|
|
160 |
source=''
|
|
|
161 |
count=''
|
|
|
162 |
weight=''
|
| 16356 |
manas |
163 |
|
| 16625 |
manas |
164 |
datesql= USER_STORE_PRODUCT_VIEW
|
|
|
165 |
conn = getDbConnection()
|
|
|
166 |
cursor = conn.cursor()
|
|
|
167 |
cursor.execute(datesql)
|
|
|
168 |
result = cursor.fetchall()
|
|
|
169 |
|
|
|
170 |
for r in result:
|
|
|
171 |
skuData = Mongo.getSkuBrandData(int(r[2]))
|
|
|
172 |
if len(skuData) !=0:
|
|
|
173 |
user_id=r[0]
|
|
|
174 |
brand=skuData['brand']
|
|
|
175 |
category_id =skuData['category_id']
|
|
|
176 |
source=r[3]
|
|
|
177 |
count=r[1]
|
|
|
178 |
if category_id==3:
|
|
|
179 |
if userMobileCategory.has_key(user_id):
|
|
|
180 |
brandMap = userMobileCategory.get(user_id)
|
|
|
181 |
else:
|
|
|
182 |
brandMap = {}
|
|
|
183 |
if brandMap.has_key(brand):
|
|
|
184 |
brandMap[brand] = brandMap.get(brand) + r[1]
|
|
|
185 |
else:
|
|
|
186 |
brandMap[brand] = r[1]
|
|
|
187 |
userMobileCategory[user_id] = brandMap
|
|
|
188 |
if category_id==5:
|
|
|
189 |
if userTabletCategory.has_key(user_id):
|
|
|
190 |
brandCategoryMap = userTabletCategory.get(user_id)
|
|
|
191 |
else:
|
|
|
192 |
brandCategoryMap = {}
|
|
|
193 |
if brandCategoryMap.has_key(brand):
|
|
|
194 |
brandCategoryMap[brand] = brandCategoryMap.get(brand) + r[1]
|
|
|
195 |
else:
|
|
|
196 |
brandCategoryMap[brand] = r[1]
|
|
|
197 |
userTabletCategory[user_id] = brandCategoryMap
|
|
|
198 |
else:
|
|
|
199 |
print 'Sku bundle Id not present',r[2]
|
|
|
200 |
|
|
|
201 |
|
|
|
202 |
cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
|
|
|
203 |
result = cursor.fetchall()
|
|
|
204 |
product_view_weight=result[0][0]
|
|
|
205 |
|
|
|
206 |
for x, y in userMobileCategory.iteritems():
|
|
|
207 |
for bra,cow in y.iteritems():
|
|
|
208 |
user_id=x
|
|
|
209 |
brand= bra
|
|
|
210 |
count=cow
|
|
|
211 |
weight=cow*product_view_weight
|
|
|
212 |
source='product_view'
|
|
|
213 |
category_id=3
|
|
|
214 |
sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
215 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
|
|
|
216 |
conn.commit()
|
|
|
217 |
|
|
|
218 |
for x, y in userTabletCategory.iteritems():
|
|
|
219 |
for bra,cow in y.iteritems():
|
|
|
220 |
user_id=x
|
|
|
221 |
brand= bra
|
|
|
222 |
count=cow
|
|
|
223 |
weight=cow*product_view_weight
|
|
|
224 |
source='product_view'
|
|
|
225 |
category_id=5
|
|
|
226 |
sql = "insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
227 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight))
|
|
|
228 |
conn.commit()
|
| 16356 |
manas |
229 |
conn.close()
|
|
|
230 |
|
| 16625 |
manas |
231 |
print '********** Completed inserting records for store product view in last 7 day table *********'
|
|
|
232 |
|
|
|
233 |
def findRepeatingOrders():
|
|
|
234 |
print '********** Call to sanitize data for orders if any multiple *********'
|
|
|
235 |
datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
|
| 16356 |
manas |
236 |
conn = getDbConnection()
|
|
|
237 |
cursor = conn.cursor()
|
| 16625 |
manas |
238 |
rows_count = cursor.execute(datesql)
|
|
|
239 |
if rows_count>0:
|
|
|
240 |
result = cursor.fetchall()
|
|
|
241 |
for r in result:
|
|
|
242 |
print r
|
|
|
243 |
findDetails(r[0],r[1],r[2])
|
|
|
244 |
else:
|
|
|
245 |
print 'No Data to be sanitized for repeating orders'
|
|
|
246 |
|
|
|
247 |
def findDetails(userId,brand,category):
|
|
|
248 |
datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
|
|
|
249 |
conn = getDbConnection()
|
|
|
250 |
cursor = conn.cursor()
|
|
|
251 |
cursor.execute(datesql,(userId,brand))
|
|
|
252 |
result = cursor.fetchall()
|
|
|
253 |
for r in result:
|
|
|
254 |
deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
|
|
|
255 |
cursor = conn.cursor()
|
|
|
256 |
cursor.execute(deletesql,(userId,brand))
|
| 16356 |
manas |
257 |
conn.commit()
|
| 16625 |
manas |
258 |
insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
259 |
cursor = conn.cursor()
|
|
|
260 |
cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
|
|
|
261 |
conn.commit()
|
|
|
262 |
|
|
|
263 |
def findRepeatingClicks():
|
|
|
264 |
print '********** Call to sanitize data for product view if any multiple *********'
|
|
|
265 |
datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
|
|
|
266 |
conn = getDbConnection()
|
|
|
267 |
cursor = conn.cursor()
|
|
|
268 |
rows_count = cursor.execute(datesql)
|
|
|
269 |
if rows_count>0:
|
|
|
270 |
result = cursor.fetchall()
|
|
|
271 |
for r in result:
|
|
|
272 |
print r
|
|
|
273 |
findViewDetails(r[0],r[1],r[2])
|
|
|
274 |
else:
|
|
|
275 |
print 'No Data to be sanitized for product view'
|
|
|
276 |
|
|
|
277 |
def findViewDetails(userId,brand,category):
|
|
|
278 |
datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
|
|
|
279 |
conn = getDbConnection()
|
|
|
280 |
cursor = conn.cursor()
|
|
|
281 |
cursor.execute(datesql,(userId,brand))
|
|
|
282 |
result = cursor.fetchall()
|
|
|
283 |
for r in result:
|
|
|
284 |
deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
|
|
|
285 |
cursor = conn.cursor()
|
|
|
286 |
cursor.execute(deletesql,(userId,brand))
|
|
|
287 |
conn.commit()
|
|
|
288 |
insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
289 |
cursor = conn.cursor()
|
|
|
290 |
cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
|
|
|
291 |
conn.commit()
|
| 16356 |
manas |
292 |
|
| 16625 |
manas |
293 |
|
|
|
294 |
def checkCountForUpdate():
|
|
|
295 |
print '********** Checking users brand whose cumulative weight is greater than 3 *********'
|
|
|
296 |
datesql= "select user_id,brand,category_id from userlinkssegmentation7days group by user_id,brand having sum(weight) >=3;"
|
|
|
297 |
conn = getDbConnection()
|
|
|
298 |
cursor = conn.cursor()
|
|
|
299 |
cursor.execute(datesql)
|
|
|
300 |
result = cursor.fetchall()
|
|
|
301 |
for r in result:
|
|
|
302 |
checkUserForUpdate(r[0],r[1],r[2])
|
|
|
303 |
conn.close()
|
|
|
304 |
def checkUserForUpdate(userId,brand,category):
|
|
|
305 |
print '********** Checking users brand present or not in the master table *********'
|
|
|
306 |
datesql= "select distinct brand from userlinkssegmentation where user_id=%s and brand=%s and category_id=%s"
|
|
|
307 |
conn = getDbConnection()
|
|
|
308 |
cursor = conn.cursor()
|
|
|
309 |
cursor.execute(datesql,(userId,brand,category))
|
|
|
310 |
result = cursor.fetchall()
|
|
|
311 |
if len(result)==0:
|
|
|
312 |
insertNewUserLinkDetails(userId,brand,category)
|
|
|
313 |
else:
|
|
|
314 |
userReplaceData(userId,brand,category)
|
|
|
315 |
conn.close()
|
|
|
316 |
|
|
|
317 |
def userReplaceData(userId,brand,category):
|
|
|
318 |
print 'Data that needs to be updated for', userId,brand,category
|
|
|
319 |
conn = getDbConnection()
|
|
|
320 |
cursor = conn.cursor()
|
|
|
321 |
cursor.execute(REPLACE_QUERY,(userId,brand,category))
|
|
|
322 |
conn.commit()
|
|
|
323 |
conn.close()
|
|
|
324 |
|
|
|
325 |
def insertNewUserLinkDetails(userId,brand,category):
|
|
|
326 |
print 'Data needs to be inserted as the brand was not present earlier', userId,brand,category
|
|
|
327 |
conn = getDbConnection()
|
|
|
328 |
selectNewUser="select * from userlinkssegmentation7days where user_id=%s and brand=%s and category_id=%s"
|
|
|
329 |
cursor = conn.cursor()
|
|
|
330 |
cursor.execute(selectNewUser,(userId,brand,category))
|
|
|
331 |
result = cursor.fetchall()
|
|
|
332 |
print 'Result while inserting new record in master',result
|
|
|
333 |
for r in result:
|
|
|
334 |
sql = "insert into userlinkssegmentation (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
335 |
cursor.execute(sql,(r[0],r[1],r[2], r[3], r[4],r[5]))
|
|
|
336 |
conn.commit()
|
|
|
337 |
conn.close()
|
| 16356 |
manas |
338 |
def main():
|
| 16625 |
manas |
339 |
print datetime.now()
|
|
|
340 |
print '********** Starting to update links for Users *********'
|
| 16356 |
manas |
341 |
deleteUserForLinks()
|
|
|
342 |
addUsersForLinks()
|
| 16625 |
manas |
343 |
filters()
|
|
|
344 |
storeProductView()
|
|
|
345 |
findRepeatingOrders()
|
|
|
346 |
findRepeatingClicks()
|
|
|
347 |
checkCountForUpdate()
|
|
|
348 |
print '********** Finished updating records for user links *********'
|
| 16356 |
manas |
349 |
if __name__=='__main__':
|
| 16625 |
manas |
350 |
main()
|