| 17317 |
manas |
1 |
import MySQLdb
|
|
|
2 |
from dtr.storage.DataService import brand_preferences
|
|
|
3 |
from elixir import *
|
|
|
4 |
from dtr.storage import DataService, Mongo
|
|
|
5 |
from dtr.storage.DataService import user_filters
|
|
|
6 |
from sqlalchemy.sql.expression import func, between
|
|
|
7 |
from datetime import datetime,timedelta
|
|
|
8 |
import re
|
|
|
9 |
import traceback
|
|
|
10 |
import pymongo
|
| 17323 |
manas |
11 |
from operator import and_
|
| 17317 |
manas |
12 |
|
|
|
13 |
DataService.initialize(echo=True)
|
|
|
14 |
|
|
|
15 |
DB_HOST = "localhost"
|
|
|
16 |
DB_USER = "root"
|
|
|
17 |
DB_PASSWORD = "shop2020"
|
|
|
18 |
DB_NAME = "dtr"
|
|
|
19 |
con=None
|
|
|
20 |
|
|
|
21 |
USER_SEGMENTATION_LINK_QUERY="""
|
|
|
22 |
select x.user_id,x.brand,x.category_id,x.count,x.source,x.weight
|
|
|
23 |
from
|
|
|
24 |
(
|
|
|
25 |
select c.user_id,c.brand,c.category_id,count(c.id) as count,"clicks" as source,(uw.weightage * count(c.id)) as weight
|
|
|
26 |
from clicks c join user_segmentation_weigthage uw on "clicks"=uw.weightage_type
|
|
|
27 |
where c.category_id is not null and c.category_id !=0 and c.brand is not null and
|
|
|
28 |
c.brand not like '' and c.user_id!=0 group by c.category_id,c.brand,c.user_id
|
|
|
29 |
|
|
|
30 |
UNION
|
|
|
31 |
|
|
|
32 |
select m.userId as user_id,m.brand,m.categoryId as category_id,count(1) as count,"orders" as source,(uw.weightage * count(1)) as weight
|
|
|
33 |
from merchantsuborders m join user_segmentation_weigthage uw on "orders"=uw.weightage_type
|
| 17323 |
manas |
34 |
where m.categoryId is not null and m.brand is not null and m.userId!=0 and m.brand not like 'NA' group by m.categoryId,m.brand,m.userId
|
| 17317 |
manas |
35 |
|
|
|
36 |
UNION
|
|
|
37 |
|
|
|
38 |
select fo.user_id,fo.brand,fo.category as category_id,count(fo.id) as count,"orders" as source, (uw.weightage * count(fo.id)) as weight
|
|
|
39 |
from flipkartorders fo join user_segmentation_weigthage uw on "orders"=uw.weightage_type
|
|
|
40 |
where fo.category is not null and fo.brand is not null and fo.brand not like '' and fo.user_id!=0 and fo.user_id is not null
|
|
|
41 |
group by fo.category,fo.brand,fo.user_id
|
|
|
42 |
|
|
|
43 |
UNION
|
|
|
44 |
|
|
|
45 |
select bp.user_id,bp.brand,bp.category_id,count(1) as count,"preferences" as source,(uw.weightage* count(1)) as weight
|
|
|
46 |
from brand_preferences bp join user_segmentation_weigthage uw on "preferences"=uw.weightage_type
|
|
|
47 |
where user_id!=0 and status='show' group by bp.category_id,bp.brand,bp.user_id
|
|
|
48 |
) as x
|
|
|
49 |
"""
|
|
|
50 |
|
|
|
51 |
USER_STORE_PRODUCT_VIEW="""
|
|
|
52 |
SELECT u.user_id,count(u.id) AS count, SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url, '/', -1 ),'?',1),'%',1),'#',1)
|
|
|
53 |
AS skuBundleId,"product_view" as source,(uw.weightage* count(u.id)) as weight
|
|
|
54 |
FROM user_urls u join user_segmentation_weigthage uw on "product_view"=uw.weightage_type
|
|
|
55 |
WHERE u.url LIKE '%store_products/view%' and u.user_id!=0 and
|
|
|
56 |
SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(SUBSTRING_INDEX(u.url, '/', -1 ),'?',1),'%',1),'#',1) not like ''
|
|
|
57 |
and datediff(curdate(),u.created)<7
|
|
|
58 |
group by u.user_id,skuBundleId;
|
|
|
59 |
"""
|
|
|
60 |
|
|
|
61 |
REPLACE_QUERY="""
|
|
|
62 |
REPLACE INTO userlinkssegmentation( user_id, brand, source,category_id, count, weight )
|
|
|
63 |
SELECT user_id, brand, source,category_id, count, weight
|
|
|
64 |
FROM userlinkssegmentation7days
|
|
|
65 |
WHERE user_id =%s and brand=%s and category_id=%s
|
|
|
66 |
"""
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
userMap = {}
|
|
|
70 |
userMobileCategory={}
|
|
|
71 |
userTabletCategory={}
|
|
|
72 |
|
|
|
73 |
def getDbConnection():
|
|
|
74 |
return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
def addUsersForLinks():
|
|
|
78 |
print '********** Inserting records in master table *********'
|
|
|
79 |
user_id=''
|
|
|
80 |
brand=''
|
|
|
81 |
category_id=''
|
|
|
82 |
source=''
|
|
|
83 |
count=''
|
|
|
84 |
weight=''
|
|
|
85 |
|
|
|
86 |
datesql= USER_SEGMENTATION_LINK_QUERY
|
|
|
87 |
conn = getDbConnection()
|
|
|
88 |
cursor = conn.cursor()
|
|
|
89 |
cursor.execute(datesql)
|
|
|
90 |
result = cursor.fetchall()
|
|
|
91 |
|
|
|
92 |
for r in result:
|
|
|
93 |
user_id=r[0]
|
|
|
94 |
brand=r[1]
|
|
|
95 |
if r[2] == 'Mobiles':
|
|
|
96 |
category_id=3
|
|
|
97 |
elif r[2] == 'Tablets':
|
|
|
98 |
category_id=5
|
|
|
99 |
else:
|
|
|
100 |
category_id=r[2]
|
|
|
101 |
count=r[3]
|
|
|
102 |
source=r[4]
|
|
|
103 |
weight=r[5]
|
| 17320 |
manas |
104 |
if brand is not 'NA':
|
|
|
105 |
sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
|
|
|
106 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
|
|
|
107 |
conn.commit()
|
| 17317 |
manas |
108 |
conn.close()
|
|
|
109 |
print '********** Completed inserting records for orders,preferences and clicks in master table *********'
|
|
|
110 |
|
|
|
111 |
def filters():
|
|
|
112 |
print '********** Inserting records for filters in master table *********'
|
|
|
113 |
conn = getDbConnection()
|
|
|
114 |
cursor = conn.cursor()
|
|
|
115 |
cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='filters'")
|
|
|
116 |
result = cursor.fetchall()
|
|
|
117 |
filter_weight=result[0][0]
|
| 17323 |
manas |
118 |
data = session.query((user_filters)).filter_by(type="brand").filter(~(user_filters.filters).like('NA')).all()
|
| 17317 |
manas |
119 |
for d in data:
|
|
|
120 |
if userMap.has_key(d.user_id):
|
|
|
121 |
brandMap = userMap.get(d.user_id)
|
|
|
122 |
else:
|
|
|
123 |
brandMap = {}
|
|
|
124 |
brands = d.filters.split('|')
|
|
|
125 |
for brand in brands:
|
|
|
126 |
if brandMap.has_key(brand):
|
|
|
127 |
brandMap[brand] = brandMap.get(brand) + 1
|
|
|
128 |
else:
|
|
|
129 |
brandMap[brand] = 1
|
|
|
130 |
userMap[d.user_id] = brandMap
|
|
|
131 |
for x, y in userMap.iteritems():
|
|
|
132 |
for bra,cow in y.iteritems():
|
|
|
133 |
user_id=x
|
|
|
134 |
brand=bra
|
|
|
135 |
count=cow
|
|
|
136 |
category_id=3
|
|
|
137 |
weight=cow*filter_weight
|
|
|
138 |
source='filters'
|
| 17320 |
manas |
139 |
if brand is not 'NA':
|
|
|
140 |
sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
|
|
|
141 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
|
|
|
142 |
conn.commit()
|
| 17317 |
manas |
143 |
conn.close()
|
|
|
144 |
print '********** Completed inserting records for filters in master table *********'
|
|
|
145 |
|
|
|
146 |
def storeProductView():
|
|
|
147 |
print '********** Inserting records for store product view in master table *********'
|
|
|
148 |
user_id=''
|
|
|
149 |
brand=''
|
|
|
150 |
category_id=''
|
|
|
151 |
source=''
|
|
|
152 |
count=''
|
|
|
153 |
weight=''
|
|
|
154 |
|
|
|
155 |
result = get_mongo_connection_new().User.browsinghistories.find({"url":{"$regex" : "/store_products\/view/"}})
|
|
|
156 |
|
|
|
157 |
for r in result:
|
|
|
158 |
matchObj = re.search('store_products/view/(.+?)/(.+?)$',r.get('url'))
|
|
|
159 |
if matchObj:
|
|
|
160 |
skuBundleId = matchObj.group(2)
|
|
|
161 |
if 'user_id' in skuBundleId or '=' in skuBundleId:
|
|
|
162 |
newMatchObj = re.search('(.+?)\?',skuBundleId)
|
|
|
163 |
if newMatchObj:
|
|
|
164 |
skuBundleId=newMatchObj.group(1)
|
|
|
165 |
else:
|
|
|
166 |
print 'No match userId', skuBundleId
|
|
|
167 |
continue
|
|
|
168 |
elif '#' in skuBundleId:
|
|
|
169 |
newMatchObj = re.search('(.+?)\#',skuBundleId)
|
|
|
170 |
if newMatchObj:
|
|
|
171 |
skuBundleId=newMatchObj.group(1)
|
|
|
172 |
else:
|
|
|
173 |
print 'No match #', skuBundleId
|
|
|
174 |
continue
|
|
|
175 |
if '%' in skuBundleId:
|
|
|
176 |
newMatchObj = re.search('(.+?)\%',skuBundleId)
|
|
|
177 |
if newMatchObj:
|
|
|
178 |
skuBundleId=newMatchObj.group(1)
|
|
|
179 |
else:
|
|
|
180 |
print 'No match', skuBundleId
|
|
|
181 |
else:
|
|
|
182 |
print "No match!!"
|
|
|
183 |
print skuBundleId
|
|
|
184 |
if skuBundleId.isdigit():
|
|
|
185 |
print 'Inside is digit true'
|
|
|
186 |
skuBundleId = int(skuBundleId)
|
|
|
187 |
else:
|
|
|
188 |
print 'Inside is digit false'
|
|
|
189 |
skuBundleId = int(float(skuBundleId))
|
|
|
190 |
|
|
|
191 |
skuData = Mongo.getSkuBrandData(skuBundleId)
|
|
|
192 |
if len(skuData) !=0:
|
|
|
193 |
user_id=r.get('user_id')
|
|
|
194 |
brand=skuData['brand']
|
|
|
195 |
category_id =skuData['category_id']
|
|
|
196 |
source='product_view'
|
|
|
197 |
count=1
|
|
|
198 |
if category_id==3:
|
|
|
199 |
if userMobileCategory.has_key(user_id):
|
|
|
200 |
brandMap = userMobileCategory.get(user_id)
|
|
|
201 |
else:
|
|
|
202 |
brandMap = {}
|
|
|
203 |
if brandMap.has_key(brand):
|
|
|
204 |
brandMap[brand] = brandMap.get(brand) + 1
|
|
|
205 |
else:
|
|
|
206 |
brandMap[brand] = 1
|
|
|
207 |
userMobileCategory[user_id] = brandMap
|
|
|
208 |
if category_id==5:
|
|
|
209 |
if userTabletCategory.has_key(user_id):
|
|
|
210 |
brandCategoryMap = userTabletCategory.get(user_id)
|
|
|
211 |
else:
|
|
|
212 |
brandCategoryMap = {}
|
|
|
213 |
if brandCategoryMap.has_key(brand):
|
|
|
214 |
brandCategoryMap[brand] = brandCategoryMap.get(brand) + 1
|
|
|
215 |
else:
|
|
|
216 |
brandCategoryMap[brand] = 1
|
|
|
217 |
userTabletCategory[user_id] = brandCategoryMap
|
|
|
218 |
else:
|
|
|
219 |
print 'Sku bundle Id not present',r.get('url')
|
|
|
220 |
|
|
|
221 |
conn=getDbConnection()
|
|
|
222 |
cursor=conn.cursor()
|
|
|
223 |
cursor.execute("select weightage from user_segmentation_weigthage where weightage_type='product_view'")
|
|
|
224 |
result = cursor.fetchall()
|
|
|
225 |
product_view_weight=result[0][0]
|
|
|
226 |
|
|
|
227 |
try:
|
|
|
228 |
for x, y in userMobileCategory.iteritems():
|
|
|
229 |
for bra,cow in y.iteritems():
|
|
|
230 |
user_id=x
|
|
|
231 |
brand= bra
|
|
|
232 |
count=cow
|
|
|
233 |
weight=cow*product_view_weight
|
|
|
234 |
source='product_view'
|
|
|
235 |
category_id=3
|
|
|
236 |
sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
|
|
|
237 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
|
|
|
238 |
conn.commit()
|
|
|
239 |
|
|
|
240 |
for x, y in userTabletCategory.iteritems():
|
|
|
241 |
for bra,cow in y.iteritems():
|
|
|
242 |
user_id=x
|
|
|
243 |
brand= bra
|
|
|
244 |
count=cow
|
|
|
245 |
weight=cow*product_view_weight
|
|
|
246 |
source='product_view'
|
|
|
247 |
category_id=5
|
|
|
248 |
sql = "replace into newuserlinkssegmentation (user_id, brand, category_id, source, count,weight,created,modified) values(%s, %s, %s, %s, %s,%s,%s,%s)"
|
|
|
249 |
cursor.execute(sql,(user_id,brand, category_id, source, count,weight,datetime.now(),datetime.now()))
|
|
|
250 |
conn.commit()
|
|
|
251 |
except:
|
|
|
252 |
print traceback.print_exc()
|
|
|
253 |
conn.close()
|
|
|
254 |
|
|
|
255 |
print '********** Completed inserting records for store product view in master table *********'
|
|
|
256 |
|
|
|
257 |
def findRepeatingOrders():
|
|
|
258 |
print '********** Call to sanitize data for orders if any multiple *********'
|
|
|
259 |
datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='orders' group by user_id,brand,category_id having count(source)=2;"
|
|
|
260 |
conn = getDbConnection()
|
|
|
261 |
cursor = conn.cursor()
|
|
|
262 |
rows_count = cursor.execute(datesql)
|
|
|
263 |
if rows_count>0:
|
|
|
264 |
result = cursor.fetchall()
|
|
|
265 |
for r in result:
|
|
|
266 |
print r
|
|
|
267 |
findDetails(r[0],r[1],r[2])
|
|
|
268 |
else:
|
|
|
269 |
print 'No Data to be sanitized for repeating orders'
|
|
|
270 |
|
|
|
271 |
def findDetails(userId,brand,category):
|
|
|
272 |
datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='orders' and brand=%s group by source;"
|
|
|
273 |
conn = getDbConnection()
|
|
|
274 |
cursor = conn.cursor()
|
|
|
275 |
cursor.execute(datesql,(userId,brand))
|
|
|
276 |
result = cursor.fetchall()
|
|
|
277 |
for r in result:
|
|
|
278 |
deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='orders'"
|
|
|
279 |
cursor = conn.cursor()
|
|
|
280 |
cursor.execute(deletesql,(userId,brand))
|
|
|
281 |
conn.commit()
|
|
|
282 |
insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
283 |
cursor = conn.cursor()
|
|
|
284 |
cursor.execute(insertsql,(userId,brand,category,'orders',r[0],r[1]))
|
|
|
285 |
conn.commit()
|
|
|
286 |
|
|
|
287 |
def findRepeatingClicks():
|
|
|
288 |
print '********** Call to sanitize data for product view if any multiple *********'
|
|
|
289 |
datesql= "select user_id,brand,category_id from userlinkssegmentation7days where source='product_view' group by user_id,brand,category_id having count(source)=2;"
|
|
|
290 |
conn = getDbConnection()
|
|
|
291 |
cursor = conn.cursor()
|
|
|
292 |
rows_count = cursor.execute(datesql)
|
|
|
293 |
if rows_count>0:
|
|
|
294 |
result = cursor.fetchall()
|
|
|
295 |
for r in result:
|
|
|
296 |
print r
|
|
|
297 |
findViewDetails(r[0],r[1],r[2])
|
|
|
298 |
else:
|
|
|
299 |
print 'No Data to be sanitized for product view'
|
|
|
300 |
|
|
|
301 |
def findViewDetails(userId,brand,category):
|
|
|
302 |
datesql= "select sum(count),sum(weight) from userlinkssegmentation7days where user_id=%s and source='product_view' and brand=%s group by source;"
|
|
|
303 |
conn = getDbConnection()
|
|
|
304 |
cursor = conn.cursor()
|
|
|
305 |
cursor.execute(datesql,(userId,brand))
|
|
|
306 |
result = cursor.fetchall()
|
|
|
307 |
for r in result:
|
|
|
308 |
deletesql="delete from userlinkssegmentation7days where user_id=%s and brand=%s and source='product_view'"
|
|
|
309 |
cursor = conn.cursor()
|
|
|
310 |
cursor.execute(deletesql,(userId,brand))
|
|
|
311 |
conn.commit()
|
|
|
312 |
insertsql="insert into userlinkssegmentation7days (user_id, brand, category_id, source, count,weight) values(%s, %s, %s, %s, %s,%s)"
|
|
|
313 |
cursor = conn.cursor()
|
|
|
314 |
cursor.execute(insertsql,(userId,brand,category,'product_view',r[0],r[1]))
|
|
|
315 |
conn.commit()
|
|
|
316 |
|
| 20689 |
kshitij.so |
317 |
def get_mongo_connection_new(host='192.168.161.154', port=27017):
|
| 17317 |
manas |
318 |
global con
|
|
|
319 |
if con is None:
|
|
|
320 |
print "Establishing connection %s host and port %d" %(host,port)
|
|
|
321 |
try:
|
|
|
322 |
con = pymongo.MongoClient(host, port)
|
|
|
323 |
except Exception, e:
|
|
|
324 |
print e
|
|
|
325 |
return None
|
|
|
326 |
return con
|
|
|
327 |
|
|
|
328 |
def main():
|
|
|
329 |
print datetime.now()
|
|
|
330 |
print '********** Starting to generate Master Data Users *********'
|
|
|
331 |
addUsersForLinks()
|
|
|
332 |
filters()
|
|
|
333 |
storeProductView()
|
|
|
334 |
print '********** Finished updating records for user links *********'
|
|
|
335 |
if __name__=='__main__':
|
|
|
336 |
main()
|