Subversion Repositories SmartDukaan

Rev

Rev 6170 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 31-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
4198 varun.gupt 6
import tornado.httpserver, tornado.ioloop, tornado.web
3232 varun.gupt 7
import json, os, ConfigParser, sys
8
from PyLucene.Retriever import Retriever
3313 varun.gupt 9
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
5401 varun.gupt 10
    getDisplayInfo, getValidItems, getProductName
4198 varun.gupt 11
from ScraperLoader import getScraper
12
from PyLucene.IndexBuilder import IndexBuilder
5291 varun.gupt 13
from DataStore.WatchListManager import WatchListManager
3232 varun.gupt 14
 
3235 rajveer 15
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
3232 varun.gupt 16
if cmd_folder not in sys.path:
17
    sys.path.insert(0, cmd_folder)
18
 
19
from shop2020.clients.CatalogClient import CatalogClient
3350 varun.gupt 20
from shop2020.thriftpy.model.v1.catalog.ttypes import status
3440 varun.gupt 21
 
3232 varun.gupt 22
class BaseHandler(tornado.web.RequestHandler):
23
    def get_current_user(self):
24
        return self.get_secure_cookie('userauth')
25
 
26
class LoginHandler(BaseHandler):
27
    def get(self):
28
        self.loader = tornado.template.Loader('HTMLTemplates')
29
        self.write(self.loader.load('LoginForm.html').generate())
30
 
31
    def post(self):
32
        config = ConfigParser.SafeConfigParser()
33
        config.read('app.cfg')
34
 
35
        username = self.get_argument('username')
36
        password = self.get_argument('password')
37
 
38
        if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
39
            print 'Password Matched'
40
            self.set_secure_cookie("userauth", username + '_' + password)
41
            self.redirect('/')
42
        else:
43
            self.redirect('/login')
44
 
4198 varun.gupt 45
class URLFeedbackHandler(BaseHandler):
5761 amar.kumar 46
    url_feedback_file = '/usr/price-comp-dashboard/urls.json'
47
    secondary_crawl_file = '/usr/price-comp-dashboard/secondary-crawl.json'
4198 varun.gupt 48
 
49
    def post(self):
50
        try:
5291 varun.gupt 51
            fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
4198 varun.gupt 52
            urls = json.load(fp_read)
53
 
54
        except ValueError as e:
55
            print e
56
            urls = {}
57
        finally:
58
            fp_read.close()
59
 
5291 varun.gupt 60
        print 'Existing URLs: ', urls
61
 
4198 varun.gupt 62
        entity = self.get_argument('entity')
63
        source = self.get_argument('source')
64
        url = self.get_argument('url')
65
 
66
        if entity in urls:
67
            urls[entity][source] = url
68
        else:
69
            urls[entity] = {source: url}
70
 
5291 varun.gupt 71
        print 'New set of URLs: ', urls
72
        fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
4198 varun.gupt 73
        json.dump(urls, fp_write, indent = 4)
74
        fp_write.close()
75
 
76
        #Scraping the page
77
        scraper = getScraper(source)
5291 varun.gupt 78
        productData = scraper.getDataFromProductPage(url)
79
 
80
        #Storing the data
81
        try:
82
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
83
            data = json.load(fp_read)
84
 
85
        except ValueError as e:
86
            print e
87
            data = {}
88
        finally:
89
            fp_read.close()
90
 
91
        if entity in data:
92
            data[entity][source] = productData
93
        else:
94
            data[entity] = {source: productData}
95
 
96
        print 'Secondary crawled data:', data
97
 
98
        fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
99
        json.dump(data, fp_write, indent = 4)
100
        fp_write.close()
101
 
102
        productData['entityId'] = entity
103
        self.write(productData)
4198 varun.gupt 104
 
105
    def get(self):
5291 varun.gupt 106
        try:
107
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
108
            data = json.load(fp_read)
109
 
110
        except ValueError as e:
111
            print e
112
            data = {}
113
        finally:
114
            fp_read.close()
115
        self.write(data)
4198 varun.gupt 116
 
3440 varun.gupt 117
class FeedbackHandler(BaseHandler):
118
 
119
    def save(self, entity, source, feedback_type, selected_item = None):
5761 amar.kumar 120
        self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
3440 varun.gupt 121
        file_to_read = open(self.feedback_file, 'r')
122
 
123
        feedbacks_json = file_to_read.read()
124
        file_to_read.close()
125
 
126
        feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
127
 
128
        if entity not in feedbacks: feedbacks[entity] = {}
129
 
130
        feedbacks[entity][source] = {'type': feedback_type}
131
 
132
        if selected_item is not None:   feedbacks[entity][source]['selected_item'] = selected_item
133
 
134
        file_to_write = open(self.feedback_file, 'w')
5291 varun.gupt 135
        json.dump(feedbacks, file_to_write, indent = 4)
3440 varun.gupt 136
        file_to_write.close()
137
 
138
    def post(self):
139
        feedback_type = self.get_argument('type')
140
        entity_id = self.get_argument('entityId')
141
        price_data_source = self.get_argument('source')
142
 
143
        print feedback_type, entity_id, price_data_source
144
 
145
        if feedback_type == 'select':
146
            selected_item = self.get_argument('selected')
147
            print selected_item
148
            self.save(entity_id, price_data_source, feedback_type, selected_item)
149
        else:
150
            self.save(entity_id, price_data_source, feedback_type)
151
 
152
    def get(self):
153
        print 'GET: Feedback data'
5761 amar.kumar 154
        self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
3440 varun.gupt 155
        file_to_read = open(self.feedback_file, 'r')
156
 
157
        feedbacks_json = file_to_read.read()
158
        file_to_read.close()
159
 
160
        self.write(feedbacks_json)
161
 
3232 varun.gupt 162
class MainHandler(BaseHandler):
163
 
4198 varun.gupt 164
    def mapSearchUrls(self, map, name):
165
 
166
        search_urls = {
6171 amar.kumar 167
            'flipkart': 'http://www.flipkart.com/search/a/all?query=$$',
168
            'homeshop18': 'http://www.homeshop18.com/search:$$',
4198 varun.gupt 169
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
6171 amar.kumar 170
            'infibeam': 'http://www.infibeam.com/search?q=$$',
5639 amar.kumar 171
            'snapdeal': '$$',
172
            'sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go',
173
            'tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'
4198 varun.gupt 174
        }
175
 
176
        for key in search_urls.iterkeys():
177
            try:
178
                if map[key]['url'] == 'Not Found':
179
                    map[key]['url'] = search_urls[key].replace('$$', name)
180
            except KeyError:
181
                map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
182
        return map
183
 
184
 
3232 varun.gupt 185
    @tornado.web.authenticated
186
    def get(self):
187
        self.loader = tornado.template.Loader('HTMLTemplates')
188
        catalog_client = CatalogClient().get_client()
3350 varun.gupt 189
        items = catalog_client.getAllItemsByStatus(status.ACTIVE)
190
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
191
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
5291 varun.gupt 192
#        synonyms = getSynonyms()
193
#        print synonyms
3232 varun.gupt 194
        retriever = Retriever()
195
        products = {}
196
 
197
        for item in items:
6170 amar.kumar 198
            if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003):  products[item.catalogItemId] = item
4198 varun.gupt 199
 
3232 varun.gupt 200
        comparative_prices = []
5291 varun.gupt 201
 
3232 varun.gupt 202
        for item in sorted(products.itervalues(), key = lambda item: item.brand):
203
            try:
204
                model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
205
                model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
206
 
5291 varun.gupt 207
                #synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
3232 varun.gupt 208
 
5291 varun.gupt 209
                search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
3440 varun.gupt 210
 
3313 varun.gupt 211
                clusters = getProductClusters(search_results)
212
                filtered_clusters = getFilteredClustersWithTopScores(clusters)
3232 varun.gupt 213
 
214
                product_name = "%s " % item.brand
215
                product_name += "%s " % model_name if model_name is not None else ''
216
                product_name += model_number if model_number is not None else ''
217
 
4198 varun.gupt 218
                display_info = getDisplayInfo(filtered_clusters, product_name)
5291 varun.gupt 219
                print 'Display Info: ', display_info
4198 varun.gupt 220
 
3440 varun.gupt 221
                display_info['entity_id'] = item.catalogItemId
3313 varun.gupt 222
                display_info['product_name'] = product_name
223
                display_info['saholic'] = {'price': item.sellingPrice}
224
                comparative_prices.append(display_info)
3232 varun.gupt 225
            except Exception as e:
5377 varun.gupt 226
                print 'Exception for %s:' % item.catalogItemId, e
3232 varun.gupt 227
 
5761 amar.kumar 228
        json.dump(comparative_prices, open('/usr/pcd_log', 'w'), indent = 4)
3232 varun.gupt 229
        self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
5291 varun.gupt 230
 
231
class WatchlistHandler(BaseHandler):
232
 
233
    @tornado.web.authenticated
234
    def get(self):
235
        watchlistManager = WatchListManager()
236
        watchlist = watchlistManager.getWatchlist()
237
        print 'Getting watchlist: ', watchlist
238
        entityIds = []
239
 
240
        for id in watchlist:
241
            entityIds.append(int(id))
242
 
243
        self.write(str(entityIds))
244
 
245
    def post(self):
246
        watchlistManager = WatchListManager()
247
 
248
        requestType = self.get_argument('type').strip()
249
        entityId = self.get_argument('entity')
250
 
251
        print 'Request Type:', requestType, ', Entity Id: ', entityId
252
 
253
        if requestType == 'save':
254
            watchlistManager.save(entity = entityId)
255
 
256
        elif requestType == 'delete':
257
            watchlistManager.remove(entity = entityId)
258
 
259
        self.write("{}")
3232 varun.gupt 260
 
5401 varun.gupt 261
class DownloadHandler(BaseHandler):
262
 
263
    def post(self):
264
        catalog_client = CatalogClient().get_client()
5413 varun.gupt 265
        retriever = Retriever()
5401 varun.gupt 266
        vendors = {}
267
 
268
        for vendor in catalog_client.getAllVendors():
269
            vendors[vendor.id] = vendor.name
270
 
271
        self.set_header('Content-Type', 'text/csv')
5413 varun.gupt 272
        self.set_header("Content-disposition", "inline; filename=price-comparison.xls")
273
 
5401 varun.gupt 274
        newLine = '\n'
275
        tab = '\t'
276
 
277
        header = 'Product' + tab
5413 varun.gupt 278
        header += 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab +  'Vendor' + tab + 'TP' + tab
279
        header += 'Saholic' + tab + 'Flipkart' + tab + 'Homeshop18' + tab + 'Infibeam' + tab +  'Snapdeal' + newLine
5401 varun.gupt 280
 
281
        responseText = header
282
 
283
        for item in getValidItems():
284
            vendorItemPricings = catalog_client.getAllItemPricing(item.id)
285
            sortedPricings = sorted(vendorItemPricings, key = lambda vendorItemPricing: vendorItemPricing.transferPrice)
5413 varun.gupt 286
            productName = getProductName(item)
5401 varun.gupt 287
 
5413 varun.gupt 288
            row = productName + tab
5401 varun.gupt 289
 
290
            if len(sortedPricings) > 0:
291
                row += vendors[sortedPricings[0].vendorId] + tab + str(sortedPricings[0].transferPrice) + tab
292
            else:
293
                row += tab + tab
294
 
295
            if len(sortedPricings) > 1:
296
                row += vendors[sortedPricings[1].vendorId] + tab + str(sortedPricings[1].transferPrice) + tab
297
            else:
298
                row += tab + tab
299
 
300
            if len(sortedPricings) > 2:
301
                row += vendors[sortedPricings[2].vendorId] + tab + str(sortedPricings[2].transferPrice) + tab
302
            else:
303
                row += tab + tab
304
 
5413 varun.gupt 305
            row += str(item.sellingPrice) + tab
5401 varun.gupt 306
 
5413 varun.gupt 307
            model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
308
            model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
309
 
310
            search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
311
 
312
            clusters = getProductClusters(search_results)
313
            filtered_clusters = getFilteredClustersWithTopScores(clusters)
314
            display_info = getDisplayInfo(filtered_clusters, productName)
315
 
316
            if 'price' in display_info['flipkart'] and display_info['flipkart']['price'] is not None:
317
                row += display_info['flipkart']['price'] + tab
318
            else:
319
                row += tab
320
 
321
            if 'price' in display_info['homeshop18'] and display_info['homeshop18']['price'] is not None:
322
                row += display_info['homeshop18']['price'] + tab
323
            else:
324
                row += tab
325
 
326
            if 'price' in display_info['infibeam'] and display_info['infibeam']['price'] is not None:
327
                row += display_info['infibeam']['price'] + tab
328
            else:
329
                row += tab
330
 
331
            if 'price' in display_info['snapdeal'] and display_info['snapdeal']['price'] is not None:
332
                row += display_info['snapdeal']['price'] + tab
333
            else:
334
                row += tab
5639 amar.kumar 335
 
336
            if 'price' in display_info['sulekha'] and display_info['sulekha']['price'] is not None:
337
                row += display_info['sulekha']['price'] + tab
338
            else:
339
                row += tab
340
 
341
            if 'price' in display_info['tradus'] and display_info['tradus']['price'] is not None:
342
                row += display_info['tradus']['price'] + tab
343
            else:
344
                row += tab
5413 varun.gupt 345
            responseText += row + newLine
5401 varun.gupt 346
 
347
        self.write(responseText)
348
 
3232 varun.gupt 349
settings  = {
350
        'static_path': os.path.join(os.path.dirname(__file__), 'static'),
351
        'login_url': '/login', 
352
        'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
353
}
354
 
355
application = tornado.web.Application([
356
                (r"/", MainHandler),
357
                (r"/login", LoginHandler),
3440 varun.gupt 358
                (r"/feedback", FeedbackHandler),
4198 varun.gupt 359
                (r"/feedback-url", URLFeedbackHandler),
5291 varun.gupt 360
                (r"/watchlist", WatchlistHandler),
5401 varun.gupt 361
                (r"/download", DownloadHandler),
3232 varun.gupt 362
                (r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
363
            ], **settings)
364
 
365
if __name__ == '__main__':
366
    http_server = tornado.httpserver.HTTPServer(application)
367
    http_server.listen(8889)
5413 varun.gupt 368
    tornado.ioloop.IOLoop.instance().start()