| 3232 |
varun.gupt |
1 |
'''
|
|
|
2 |
Created on 31-Aug-2011
|
|
|
3 |
|
|
|
4 |
@author: Varun Gupta
|
|
|
5 |
'''
|
| 4198 |
varun.gupt |
6 |
import tornado.httpserver, tornado.ioloop, tornado.web
|
| 3232 |
varun.gupt |
7 |
import json, os, ConfigParser, sys
|
|
|
8 |
from PyLucene.Retriever import Retriever
|
| 3313 |
varun.gupt |
9 |
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
|
| 3440 |
varun.gupt |
10 |
getDisplayInfo, getSynonyms
|
| 4198 |
varun.gupt |
11 |
from ScraperLoader import getScraper
|
|
|
12 |
from PyLucene.IndexBuilder import IndexBuilder
|
| 3232 |
varun.gupt |
13 |
|
| 3235 |
rajveer |
14 |
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
|
| 3232 |
varun.gupt |
15 |
if cmd_folder not in sys.path:
|
|
|
16 |
sys.path.insert(0, cmd_folder)
|
|
|
17 |
|
|
|
18 |
from shop2020.clients.CatalogClient import CatalogClient
|
| 3350 |
varun.gupt |
19 |
from shop2020.thriftpy.model.v1.catalog.ttypes import status
|
| 3440 |
varun.gupt |
20 |
|
| 3232 |
varun.gupt |
21 |
class BaseHandler(tornado.web.RequestHandler):
|
|
|
22 |
def get_current_user(self):
|
|
|
23 |
return self.get_secure_cookie('userauth')
|
|
|
24 |
|
|
|
25 |
class LoginHandler(BaseHandler):
|
|
|
26 |
def get(self):
|
|
|
27 |
self.loader = tornado.template.Loader('HTMLTemplates')
|
|
|
28 |
self.write(self.loader.load('LoginForm.html').generate())
|
|
|
29 |
|
|
|
30 |
def post(self):
|
|
|
31 |
config = ConfigParser.SafeConfigParser()
|
|
|
32 |
config.read('app.cfg')
|
|
|
33 |
|
|
|
34 |
username = self.get_argument('username')
|
|
|
35 |
password = self.get_argument('password')
|
|
|
36 |
|
|
|
37 |
if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
|
|
|
38 |
print 'Password Matched'
|
|
|
39 |
self.set_secure_cookie("userauth", username + '_' + password)
|
|
|
40 |
self.redirect('/')
|
|
|
41 |
else:
|
|
|
42 |
self.redirect('/login')
|
|
|
43 |
|
| 4198 |
varun.gupt |
44 |
class URLFeedbackHandler(BaseHandler):
|
|
|
45 |
|
|
|
46 |
def post(self):
|
|
|
47 |
self.url_feedback_file = '/tmp/price-comp-dashboard/urls.json'
|
|
|
48 |
|
|
|
49 |
try:
|
|
|
50 |
fp_read = open(self.url_feedback_file, 'r')
|
|
|
51 |
urls = json.load(fp_read)
|
|
|
52 |
|
|
|
53 |
except ValueError as e:
|
|
|
54 |
print e
|
|
|
55 |
urls = {}
|
|
|
56 |
finally:
|
|
|
57 |
fp_read.close()
|
|
|
58 |
print urls
|
|
|
59 |
|
|
|
60 |
entity = self.get_argument('entity')
|
|
|
61 |
source = self.get_argument('source')
|
|
|
62 |
url = self.get_argument('url')
|
|
|
63 |
|
|
|
64 |
if entity in urls:
|
|
|
65 |
urls[entity][source] = url
|
|
|
66 |
else:
|
|
|
67 |
urls[entity] = {source: url}
|
|
|
68 |
|
|
|
69 |
fp_write = open(self.url_feedback_file, 'w')
|
|
|
70 |
json.dump(urls, fp_write, indent = 4)
|
|
|
71 |
fp_write.close()
|
|
|
72 |
|
|
|
73 |
#Scraping the page
|
|
|
74 |
scraper = getScraper(source)
|
|
|
75 |
data = scraper.getDataFromProductPage(url)
|
|
|
76 |
index_builder = IndexBuilder([data], new_index = False)
|
|
|
77 |
index_builder.build()
|
|
|
78 |
self.write(data)
|
|
|
79 |
|
|
|
80 |
def get(self):
|
|
|
81 |
self.url_feedback_file = '/tmp/price-comp-dashboard/urls.json'
|
|
|
82 |
|
| 3440 |
varun.gupt |
83 |
class FeedbackHandler(BaseHandler):
|
|
|
84 |
|
|
|
85 |
def save(self, entity, source, feedback_type, selected_item = None):
|
| 4198 |
varun.gupt |
86 |
self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
|
| 3440 |
varun.gupt |
87 |
file_to_read = open(self.feedback_file, 'r')
|
|
|
88 |
|
|
|
89 |
feedbacks_json = file_to_read.read()
|
|
|
90 |
file_to_read.close()
|
|
|
91 |
|
|
|
92 |
feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
|
|
|
93 |
|
|
|
94 |
if entity not in feedbacks: feedbacks[entity] = {}
|
|
|
95 |
|
|
|
96 |
feedbacks[entity][source] = {'type': feedback_type}
|
|
|
97 |
|
|
|
98 |
if selected_item is not None: feedbacks[entity][source]['selected_item'] = selected_item
|
|
|
99 |
|
|
|
100 |
file_to_write = open(self.feedback_file, 'w')
|
|
|
101 |
file_to_write.write(json.dumps(feedbacks))
|
|
|
102 |
file_to_write.close()
|
|
|
103 |
|
|
|
104 |
def post(self):
|
|
|
105 |
feedback_type = self.get_argument('type')
|
|
|
106 |
entity_id = self.get_argument('entityId')
|
|
|
107 |
price_data_source = self.get_argument('source')
|
|
|
108 |
|
|
|
109 |
print feedback_type, entity_id, price_data_source
|
|
|
110 |
|
|
|
111 |
if feedback_type == 'select':
|
|
|
112 |
selected_item = self.get_argument('selected')
|
|
|
113 |
print selected_item
|
|
|
114 |
self.save(entity_id, price_data_source, feedback_type, selected_item)
|
|
|
115 |
else:
|
|
|
116 |
self.save(entity_id, price_data_source, feedback_type)
|
|
|
117 |
|
|
|
118 |
def get(self):
|
|
|
119 |
print 'GET: Feedback data'
|
| 4198 |
varun.gupt |
120 |
self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
|
| 3440 |
varun.gupt |
121 |
file_to_read = open(self.feedback_file, 'r')
|
|
|
122 |
|
|
|
123 |
feedbacks_json = file_to_read.read()
|
|
|
124 |
file_to_read.close()
|
|
|
125 |
|
|
|
126 |
self.write(feedbacks_json)
|
|
|
127 |
|
| 3232 |
varun.gupt |
128 |
class MainHandler(BaseHandler):
|
|
|
129 |
|
| 4198 |
varun.gupt |
130 |
def mapSearchUrls(self, map, name):
|
|
|
131 |
|
|
|
132 |
search_urls = {
|
|
|
133 |
'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
|
|
|
134 |
'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
|
|
|
135 |
'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
|
|
|
136 |
'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
|
|
|
137 |
'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$'
|
|
|
138 |
}
|
|
|
139 |
|
|
|
140 |
for key in search_urls.iterkeys():
|
|
|
141 |
try:
|
|
|
142 |
if map[key]['url'] == 'Not Found':
|
|
|
143 |
map[key]['url'] = search_urls[key].replace('$$', name)
|
|
|
144 |
except KeyError:
|
|
|
145 |
map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
|
|
|
146 |
return map
|
|
|
147 |
|
|
|
148 |
|
| 3232 |
varun.gupt |
149 |
@tornado.web.authenticated
|
|
|
150 |
def get(self):
|
|
|
151 |
self.loader = tornado.template.Loader('HTMLTemplates')
|
|
|
152 |
catalog_client = CatalogClient().get_client()
|
| 3350 |
varun.gupt |
153 |
items = catalog_client.getAllItemsByStatus(status.ACTIVE)
|
|
|
154 |
items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
|
|
|
155 |
items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
|
| 3440 |
varun.gupt |
156 |
synonyms = getSynonyms()
|
|
|
157 |
print synonyms
|
| 3232 |
varun.gupt |
158 |
retriever = Retriever()
|
|
|
159 |
products = {}
|
|
|
160 |
|
|
|
161 |
for item in items:
|
|
|
162 |
if item.category in (10002, 10003, 10004, 10005, 10010): products[item.catalogItemId] = item
|
| 4198 |
varun.gupt |
163 |
|
| 3232 |
varun.gupt |
164 |
comparative_prices = []
|
| 3313 |
varun.gupt |
165 |
|
| 3232 |
varun.gupt |
166 |
for item in sorted(products.itervalues(), key = lambda item: item.brand):
|
|
|
167 |
try:
|
|
|
168 |
model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
|
|
|
169 |
model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
|
|
|
170 |
|
| 3440 |
varun.gupt |
171 |
synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
|
| 3232 |
varun.gupt |
172 |
|
| 3440 |
varun.gupt |
173 |
search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = synonyms_for_this_model)
|
|
|
174 |
|
| 3313 |
varun.gupt |
175 |
clusters = getProductClusters(search_results)
|
|
|
176 |
filtered_clusters = getFilteredClustersWithTopScores(clusters)
|
| 3232 |
varun.gupt |
177 |
|
|
|
178 |
product_name = "%s " % item.brand
|
|
|
179 |
product_name += "%s " % model_name if model_name is not None else ''
|
|
|
180 |
product_name += model_number if model_number is not None else ''
|
|
|
181 |
|
| 4198 |
varun.gupt |
182 |
display_info = getDisplayInfo(filtered_clusters, product_name)
|
|
|
183 |
|
| 3440 |
varun.gupt |
184 |
display_info['entity_id'] = item.catalogItemId
|
| 3313 |
varun.gupt |
185 |
display_info['product_name'] = product_name
|
|
|
186 |
display_info['saholic'] = {'price': item.sellingPrice}
|
|
|
187 |
comparative_prices.append(display_info)
|
| 3232 |
varun.gupt |
188 |
except Exception as e:
|
| 4198 |
varun.gupt |
189 |
print 'Exception:', e
|
| 3232 |
varun.gupt |
190 |
|
|
|
191 |
self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
|
|
|
192 |
|
|
|
193 |
|
|
|
194 |
settings = {
|
|
|
195 |
'static_path': os.path.join(os.path.dirname(__file__), 'static'),
|
|
|
196 |
'login_url': '/login',
|
|
|
197 |
'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
|
|
|
198 |
}
|
|
|
199 |
|
|
|
200 |
application = tornado.web.Application([
|
|
|
201 |
(r"/", MainHandler),
|
|
|
202 |
(r"/login", LoginHandler),
|
| 3440 |
varun.gupt |
203 |
(r"/feedback", FeedbackHandler),
|
| 4198 |
varun.gupt |
204 |
(r"/feedback-url", URLFeedbackHandler),
|
| 3232 |
varun.gupt |
205 |
(r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
|
|
|
206 |
], **settings)
|
|
|
207 |
|
|
|
208 |
if __name__ == '__main__':
|
|
|
209 |
http_server = tornado.httpserver.HTTPServer(application)
|
|
|
210 |
http_server.listen(8889)
|
|
|
211 |
tornado.ioloop.IOLoop.instance().start()
|