| 3232 |
varun.gupt |
1 |
'''
|
|
|
2 |
Created on 31-Aug-2011
|
|
|
3 |
|
|
|
4 |
@author: Varun Gupta
|
|
|
5 |
'''
|
| 4198 |
varun.gupt |
6 |
import tornado.httpserver, tornado.ioloop, tornado.web
|
| 3232 |
varun.gupt |
7 |
import json, os, ConfigParser, sys
|
|
|
8 |
from PyLucene.Retriever import Retriever
|
| 3313 |
varun.gupt |
9 |
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
|
| 5401 |
varun.gupt |
10 |
getDisplayInfo, getValidItems, getProductName
|
| 4198 |
varun.gupt |
11 |
from ScraperLoader import getScraper
|
|
|
12 |
from PyLucene.IndexBuilder import IndexBuilder
|
| 5291 |
varun.gupt |
13 |
from DataStore.WatchListManager import WatchListManager
|
| 3232 |
varun.gupt |
14 |
|
| 3235 |
rajveer |
15 |
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
|
| 3232 |
varun.gupt |
16 |
if cmd_folder not in sys.path:
|
|
|
17 |
sys.path.insert(0, cmd_folder)
|
|
|
18 |
|
|
|
19 |
from shop2020.clients.CatalogClient import CatalogClient
|
| 3350 |
varun.gupt |
20 |
from shop2020.thriftpy.model.v1.catalog.ttypes import status
|
| 3440 |
varun.gupt |
21 |
|
| 3232 |
varun.gupt |
22 |
class BaseHandler(tornado.web.RequestHandler):
|
|
|
23 |
def get_current_user(self):
|
|
|
24 |
return self.get_secure_cookie('userauth')
|
|
|
25 |
|
|
|
26 |
class LoginHandler(BaseHandler):
|
|
|
27 |
def get(self):
|
|
|
28 |
self.loader = tornado.template.Loader('HTMLTemplates')
|
|
|
29 |
self.write(self.loader.load('LoginForm.html').generate())
|
|
|
30 |
|
|
|
31 |
def post(self):
|
|
|
32 |
config = ConfigParser.SafeConfigParser()
|
|
|
33 |
config.read('app.cfg')
|
|
|
34 |
|
|
|
35 |
username = self.get_argument('username')
|
|
|
36 |
password = self.get_argument('password')
|
|
|
37 |
|
|
|
38 |
if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
|
|
|
39 |
print 'Password Matched'
|
|
|
40 |
self.set_secure_cookie("userauth", username + '_' + password)
|
|
|
41 |
self.redirect('/')
|
|
|
42 |
else:
|
|
|
43 |
self.redirect('/login')
|
|
|
44 |
|
| 4198 |
varun.gupt |
45 |
class URLFeedbackHandler(BaseHandler):
|
| 5761 |
amar.kumar |
46 |
url_feedback_file = '/usr/price-comp-dashboard/urls.json'
|
|
|
47 |
secondary_crawl_file = '/usr/price-comp-dashboard/secondary-crawl.json'
|
| 4198 |
varun.gupt |
48 |
|
|
|
49 |
def post(self):
|
|
|
50 |
try:
|
| 5291 |
varun.gupt |
51 |
fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
|
| 4198 |
varun.gupt |
52 |
urls = json.load(fp_read)
|
|
|
53 |
|
|
|
54 |
except ValueError as e:
|
|
|
55 |
print e
|
|
|
56 |
urls = {}
|
|
|
57 |
finally:
|
|
|
58 |
fp_read.close()
|
|
|
59 |
|
| 5291 |
varun.gupt |
60 |
print 'Existing URLs: ', urls
|
|
|
61 |
|
| 4198 |
varun.gupt |
62 |
entity = self.get_argument('entity')
|
|
|
63 |
source = self.get_argument('source')
|
|
|
64 |
url = self.get_argument('url')
|
|
|
65 |
|
|
|
66 |
if entity in urls:
|
|
|
67 |
urls[entity][source] = url
|
|
|
68 |
else:
|
|
|
69 |
urls[entity] = {source: url}
|
|
|
70 |
|
| 5291 |
varun.gupt |
71 |
print 'New set of URLs: ', urls
|
|
|
72 |
fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
|
| 4198 |
varun.gupt |
73 |
json.dump(urls, fp_write, indent = 4)
|
|
|
74 |
fp_write.close()
|
|
|
75 |
|
|
|
76 |
#Scraping the page
|
|
|
77 |
scraper = getScraper(source)
|
| 5291 |
varun.gupt |
78 |
productData = scraper.getDataFromProductPage(url)
|
|
|
79 |
|
|
|
80 |
#Storing the data
|
|
|
81 |
try:
|
|
|
82 |
fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
|
|
|
83 |
data = json.load(fp_read)
|
|
|
84 |
|
|
|
85 |
except ValueError as e:
|
|
|
86 |
print e
|
|
|
87 |
data = {}
|
|
|
88 |
finally:
|
|
|
89 |
fp_read.close()
|
|
|
90 |
|
|
|
91 |
if entity in data:
|
|
|
92 |
data[entity][source] = productData
|
|
|
93 |
else:
|
|
|
94 |
data[entity] = {source: productData}
|
|
|
95 |
|
|
|
96 |
print 'Secondary crawled data:', data
|
|
|
97 |
|
|
|
98 |
fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
|
|
|
99 |
json.dump(data, fp_write, indent = 4)
|
|
|
100 |
fp_write.close()
|
|
|
101 |
|
|
|
102 |
productData['entityId'] = entity
|
|
|
103 |
self.write(productData)
|
| 4198 |
varun.gupt |
104 |
|
|
|
105 |
def get(self):
|
| 5291 |
varun.gupt |
106 |
try:
|
|
|
107 |
fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
|
|
|
108 |
data = json.load(fp_read)
|
|
|
109 |
|
|
|
110 |
except ValueError as e:
|
|
|
111 |
print e
|
|
|
112 |
data = {}
|
|
|
113 |
finally:
|
|
|
114 |
fp_read.close()
|
|
|
115 |
self.write(data)
|
| 4198 |
varun.gupt |
116 |
|
| 3440 |
varun.gupt |
117 |
class FeedbackHandler(BaseHandler):
|
|
|
118 |
|
|
|
119 |
def save(self, entity, source, feedback_type, selected_item = None):
|
| 5761 |
amar.kumar |
120 |
self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
|
| 3440 |
varun.gupt |
121 |
file_to_read = open(self.feedback_file, 'r')
|
|
|
122 |
|
|
|
123 |
feedbacks_json = file_to_read.read()
|
|
|
124 |
file_to_read.close()
|
|
|
125 |
|
|
|
126 |
feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
|
|
|
127 |
|
|
|
128 |
if entity not in feedbacks: feedbacks[entity] = {}
|
|
|
129 |
|
|
|
130 |
feedbacks[entity][source] = {'type': feedback_type}
|
|
|
131 |
|
|
|
132 |
if selected_item is not None: feedbacks[entity][source]['selected_item'] = selected_item
|
|
|
133 |
|
|
|
134 |
file_to_write = open(self.feedback_file, 'w')
|
| 5291 |
varun.gupt |
135 |
json.dump(feedbacks, file_to_write, indent = 4)
|
| 3440 |
varun.gupt |
136 |
file_to_write.close()
|
|
|
137 |
|
|
|
138 |
def post(self):
|
|
|
139 |
feedback_type = self.get_argument('type')
|
|
|
140 |
entity_id = self.get_argument('entityId')
|
|
|
141 |
price_data_source = self.get_argument('source')
|
|
|
142 |
|
|
|
143 |
print feedback_type, entity_id, price_data_source
|
|
|
144 |
|
|
|
145 |
if feedback_type == 'select':
|
|
|
146 |
selected_item = self.get_argument('selected')
|
|
|
147 |
print selected_item
|
|
|
148 |
self.save(entity_id, price_data_source, feedback_type, selected_item)
|
|
|
149 |
else:
|
|
|
150 |
self.save(entity_id, price_data_source, feedback_type)
|
|
|
151 |
|
|
|
152 |
def get(self):
|
|
|
153 |
print 'GET: Feedback data'
|
| 5761 |
amar.kumar |
154 |
self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
|
| 3440 |
varun.gupt |
155 |
file_to_read = open(self.feedback_file, 'r')
|
|
|
156 |
|
|
|
157 |
feedbacks_json = file_to_read.read()
|
|
|
158 |
file_to_read.close()
|
|
|
159 |
|
|
|
160 |
self.write(feedbacks_json)
|
|
|
161 |
|
| 3232 |
varun.gupt |
162 |
class MainHandler(BaseHandler):
|
|
|
163 |
|
| 4198 |
varun.gupt |
164 |
def mapSearchUrls(self, map, name):
|
|
|
165 |
|
|
|
166 |
search_urls = {
|
|
|
167 |
'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
|
|
|
168 |
'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
|
|
|
169 |
'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
|
|
|
170 |
'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
|
| 5291 |
varun.gupt |
171 |
'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
|
| 5639 |
amar.kumar |
172 |
'snapdeal': '$$',
|
|
|
173 |
'sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go',
|
|
|
174 |
'tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'
|
| 4198 |
varun.gupt |
175 |
}
|
|
|
176 |
|
|
|
177 |
for key in search_urls.iterkeys():
|
|
|
178 |
try:
|
|
|
179 |
if map[key]['url'] == 'Not Found':
|
|
|
180 |
map[key]['url'] = search_urls[key].replace('$$', name)
|
|
|
181 |
except KeyError:
|
|
|
182 |
map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
|
|
|
183 |
return map
|
|
|
184 |
|
|
|
185 |
|
| 3232 |
varun.gupt |
186 |
@tornado.web.authenticated
|
|
|
187 |
def get(self):
|
|
|
188 |
self.loader = tornado.template.Loader('HTMLTemplates')
|
|
|
189 |
catalog_client = CatalogClient().get_client()
|
| 3350 |
varun.gupt |
190 |
items = catalog_client.getAllItemsByStatus(status.ACTIVE)
|
|
|
191 |
items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
|
|
|
192 |
items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
|
| 5291 |
varun.gupt |
193 |
# synonyms = getSynonyms()
|
|
|
194 |
# print synonyms
|
| 3232 |
varun.gupt |
195 |
retriever = Retriever()
|
|
|
196 |
products = {}
|
|
|
197 |
|
|
|
198 |
for item in items:
|
| 6170 |
amar.kumar |
199 |
if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003): products[item.catalogItemId] = item
|
| 4198 |
varun.gupt |
200 |
|
| 3232 |
varun.gupt |
201 |
comparative_prices = []
|
| 5291 |
varun.gupt |
202 |
|
| 3232 |
varun.gupt |
203 |
for item in sorted(products.itervalues(), key = lambda item: item.brand):
|
|
|
204 |
try:
|
|
|
205 |
model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
|
|
|
206 |
model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
|
|
|
207 |
|
| 5291 |
varun.gupt |
208 |
#synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
|
| 3232 |
varun.gupt |
209 |
|
| 5291 |
varun.gupt |
210 |
search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
|
| 3440 |
varun.gupt |
211 |
|
| 3313 |
varun.gupt |
212 |
clusters = getProductClusters(search_results)
|
|
|
213 |
filtered_clusters = getFilteredClustersWithTopScores(clusters)
|
| 3232 |
varun.gupt |
214 |
|
|
|
215 |
product_name = "%s " % item.brand
|
|
|
216 |
product_name += "%s " % model_name if model_name is not None else ''
|
|
|
217 |
product_name += model_number if model_number is not None else ''
|
|
|
218 |
|
| 4198 |
varun.gupt |
219 |
display_info = getDisplayInfo(filtered_clusters, product_name)
|
| 5291 |
varun.gupt |
220 |
print 'Display Info: ', display_info
|
| 4198 |
varun.gupt |
221 |
|
| 3440 |
varun.gupt |
222 |
display_info['entity_id'] = item.catalogItemId
|
| 3313 |
varun.gupt |
223 |
display_info['product_name'] = product_name
|
|
|
224 |
display_info['saholic'] = {'price': item.sellingPrice}
|
|
|
225 |
comparative_prices.append(display_info)
|
| 3232 |
varun.gupt |
226 |
except Exception as e:
|
| 5377 |
varun.gupt |
227 |
print 'Exception for %s:' % item.catalogItemId, e
|
| 3232 |
varun.gupt |
228 |
|
| 5761 |
amar.kumar |
229 |
json.dump(comparative_prices, open('/usr/pcd_log', 'w'), indent = 4)
|
| 3232 |
varun.gupt |
230 |
self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
|
| 5291 |
varun.gupt |
231 |
|
|
|
232 |
class WatchlistHandler(BaseHandler):
|
|
|
233 |
|
|
|
234 |
@tornado.web.authenticated
|
|
|
235 |
def get(self):
|
|
|
236 |
watchlistManager = WatchListManager()
|
|
|
237 |
watchlist = watchlistManager.getWatchlist()
|
|
|
238 |
print 'Getting watchlist: ', watchlist
|
|
|
239 |
entityIds = []
|
|
|
240 |
|
|
|
241 |
for id in watchlist:
|
|
|
242 |
entityIds.append(int(id))
|
|
|
243 |
|
|
|
244 |
self.write(str(entityIds))
|
|
|
245 |
|
|
|
246 |
def post(self):
|
|
|
247 |
watchlistManager = WatchListManager()
|
|
|
248 |
|
|
|
249 |
requestType = self.get_argument('type').strip()
|
|
|
250 |
entityId = self.get_argument('entity')
|
|
|
251 |
|
|
|
252 |
print 'Request Type:', requestType, ', Entity Id: ', entityId
|
|
|
253 |
|
|
|
254 |
if requestType == 'save':
|
|
|
255 |
watchlistManager.save(entity = entityId)
|
|
|
256 |
|
|
|
257 |
elif requestType == 'delete':
|
|
|
258 |
watchlistManager.remove(entity = entityId)
|
|
|
259 |
|
|
|
260 |
self.write("{}")
|
| 3232 |
varun.gupt |
261 |
|
| 5401 |
varun.gupt |
262 |
class DownloadHandler(BaseHandler):
|
|
|
263 |
|
|
|
264 |
def post(self):
|
|
|
265 |
catalog_client = CatalogClient().get_client()
|
| 5413 |
varun.gupt |
266 |
retriever = Retriever()
|
| 5401 |
varun.gupt |
267 |
vendors = {}
|
|
|
268 |
|
|
|
269 |
for vendor in catalog_client.getAllVendors():
|
|
|
270 |
vendors[vendor.id] = vendor.name
|
|
|
271 |
|
|
|
272 |
self.set_header('Content-Type', 'text/csv')
|
| 5413 |
varun.gupt |
273 |
self.set_header("Content-disposition", "inline; filename=price-comparison.xls")
|
|
|
274 |
|
| 5401 |
varun.gupt |
275 |
newLine = '\n'
|
|
|
276 |
tab = '\t'
|
|
|
277 |
|
|
|
278 |
header = 'Product' + tab
|
| 5413 |
varun.gupt |
279 |
header += 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab
|
|
|
280 |
header += 'Saholic' + tab + 'Flipkart' + tab + 'Homeshop18' + tab + 'Infibeam' + tab + 'Snapdeal' + newLine
|
| 5401 |
varun.gupt |
281 |
|
|
|
282 |
responseText = header
|
|
|
283 |
|
|
|
284 |
for item in getValidItems():
|
|
|
285 |
vendorItemPricings = catalog_client.getAllItemPricing(item.id)
|
|
|
286 |
sortedPricings = sorted(vendorItemPricings, key = lambda vendorItemPricing: vendorItemPricing.transferPrice)
|
| 5413 |
varun.gupt |
287 |
productName = getProductName(item)
|
| 5401 |
varun.gupt |
288 |
|
| 5413 |
varun.gupt |
289 |
row = productName + tab
|
| 5401 |
varun.gupt |
290 |
|
|
|
291 |
if len(sortedPricings) > 0:
|
|
|
292 |
row += vendors[sortedPricings[0].vendorId] + tab + str(sortedPricings[0].transferPrice) + tab
|
|
|
293 |
else:
|
|
|
294 |
row += tab + tab
|
|
|
295 |
|
|
|
296 |
if len(sortedPricings) > 1:
|
|
|
297 |
row += vendors[sortedPricings[1].vendorId] + tab + str(sortedPricings[1].transferPrice) + tab
|
|
|
298 |
else:
|
|
|
299 |
row += tab + tab
|
|
|
300 |
|
|
|
301 |
if len(sortedPricings) > 2:
|
|
|
302 |
row += vendors[sortedPricings[2].vendorId] + tab + str(sortedPricings[2].transferPrice) + tab
|
|
|
303 |
else:
|
|
|
304 |
row += tab + tab
|
|
|
305 |
|
| 5413 |
varun.gupt |
306 |
row += str(item.sellingPrice) + tab
|
| 5401 |
varun.gupt |
307 |
|
| 5413 |
varun.gupt |
308 |
model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
|
|
|
309 |
model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
|
|
|
310 |
|
|
|
311 |
search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
|
|
|
312 |
|
|
|
313 |
clusters = getProductClusters(search_results)
|
|
|
314 |
filtered_clusters = getFilteredClustersWithTopScores(clusters)
|
|
|
315 |
display_info = getDisplayInfo(filtered_clusters, productName)
|
|
|
316 |
|
|
|
317 |
if 'price' in display_info['flipkart'] and display_info['flipkart']['price'] is not None:
|
|
|
318 |
row += display_info['flipkart']['price'] + tab
|
|
|
319 |
else:
|
|
|
320 |
row += tab
|
|
|
321 |
|
|
|
322 |
if 'price' in display_info['homeshop18'] and display_info['homeshop18']['price'] is not None:
|
|
|
323 |
row += display_info['homeshop18']['price'] + tab
|
|
|
324 |
else:
|
|
|
325 |
row += tab
|
|
|
326 |
|
|
|
327 |
if 'price' in display_info['infibeam'] and display_info['infibeam']['price'] is not None:
|
|
|
328 |
row += display_info['infibeam']['price'] + tab
|
|
|
329 |
else:
|
|
|
330 |
row += tab
|
|
|
331 |
|
|
|
332 |
if 'price' in display_info['snapdeal'] and display_info['snapdeal']['price'] is not None:
|
|
|
333 |
row += display_info['snapdeal']['price'] + tab
|
|
|
334 |
else:
|
|
|
335 |
row += tab
|
| 5639 |
amar.kumar |
336 |
|
|
|
337 |
if 'price' in display_info['sulekha'] and display_info['sulekha']['price'] is not None:
|
|
|
338 |
row += display_info['sulekha']['price'] + tab
|
|
|
339 |
else:
|
|
|
340 |
row += tab
|
|
|
341 |
|
|
|
342 |
if 'price' in display_info['tradus'] and display_info['tradus']['price'] is not None:
|
|
|
343 |
row += display_info['tradus']['price'] + tab
|
|
|
344 |
else:
|
|
|
345 |
row += tab
|
| 5413 |
varun.gupt |
346 |
responseText += row + newLine
|
| 5401 |
varun.gupt |
347 |
|
|
|
348 |
self.write(responseText)
|
|
|
349 |
|
| 3232 |
varun.gupt |
350 |
settings = {
|
|
|
351 |
'static_path': os.path.join(os.path.dirname(__file__), 'static'),
|
|
|
352 |
'login_url': '/login',
|
|
|
353 |
'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
|
|
|
354 |
}
|
|
|
355 |
|
|
|
356 |
application = tornado.web.Application([
|
|
|
357 |
(r"/", MainHandler),
|
|
|
358 |
(r"/login", LoginHandler),
|
| 3440 |
varun.gupt |
359 |
(r"/feedback", FeedbackHandler),
|
| 4198 |
varun.gupt |
360 |
(r"/feedback-url", URLFeedbackHandler),
|
| 5291 |
varun.gupt |
361 |
(r"/watchlist", WatchlistHandler),
|
| 5401 |
varun.gupt |
362 |
(r"/download", DownloadHandler),
|
| 3232 |
varun.gupt |
363 |
(r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
|
|
|
364 |
], **settings)
|
|
|
365 |
|
|
|
366 |
if __name__ == '__main__':
|
|
|
367 |
http_server = tornado.httpserver.HTTPServer(application)
|
|
|
368 |
http_server.listen(8889)
|
| 5413 |
varun.gupt |
369 |
tornado.ioloop.IOLoop.instance().start()
|