Subversion Repositories SmartDukaan

Rev

Rev 5761 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 31-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
4198 varun.gupt 6
import tornado.httpserver, tornado.ioloop, tornado.web
3232 varun.gupt 7
import json, os, ConfigParser, sys
8
from PyLucene.Retriever import Retriever
3313 varun.gupt 9
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
5401 varun.gupt 10
    getDisplayInfo, getValidItems, getProductName
4198 varun.gupt 11
from ScraperLoader import getScraper
12
from PyLucene.IndexBuilder import IndexBuilder
5291 varun.gupt 13
from DataStore.WatchListManager import WatchListManager
3232 varun.gupt 14
 
3235 rajveer 15
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
3232 varun.gupt 16
if cmd_folder not in sys.path:
17
    sys.path.insert(0, cmd_folder)
18
 
19
from shop2020.clients.CatalogClient import CatalogClient
3350 varun.gupt 20
from shop2020.thriftpy.model.v1.catalog.ttypes import status
3440 varun.gupt 21
 
3232 varun.gupt 22
class BaseHandler(tornado.web.RequestHandler):
23
    def get_current_user(self):
24
        return self.get_secure_cookie('userauth')
25
 
26
class LoginHandler(BaseHandler):
27
    def get(self):
28
        self.loader = tornado.template.Loader('HTMLTemplates')
29
        self.write(self.loader.load('LoginForm.html').generate())
30
 
31
    def post(self):
32
        config = ConfigParser.SafeConfigParser()
33
        config.read('app.cfg')
34
 
35
        username = self.get_argument('username')
36
        password = self.get_argument('password')
37
 
38
        if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
39
            print 'Password Matched'
40
            self.set_secure_cookie("userauth", username + '_' + password)
41
            self.redirect('/')
42
        else:
43
            self.redirect('/login')
44
 
4198 varun.gupt 45
class URLFeedbackHandler(BaseHandler):
5761 amar.kumar 46
    url_feedback_file = '/usr/price-comp-dashboard/urls.json'
47
    secondary_crawl_file = '/usr/price-comp-dashboard/secondary-crawl.json'
4198 varun.gupt 48
 
49
    def post(self):
50
        try:
5291 varun.gupt 51
            fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
4198 varun.gupt 52
            urls = json.load(fp_read)
53
 
54
        except ValueError as e:
55
            print e
56
            urls = {}
57
        finally:
58
            fp_read.close()
59
 
5291 varun.gupt 60
        print 'Existing URLs: ', urls
61
 
4198 varun.gupt 62
        entity = self.get_argument('entity')
63
        source = self.get_argument('source')
64
        url = self.get_argument('url')
65
 
66
        if entity in urls:
67
            urls[entity][source] = url
68
        else:
69
            urls[entity] = {source: url}
70
 
5291 varun.gupt 71
        print 'New set of URLs: ', urls
72
        fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
4198 varun.gupt 73
        json.dump(urls, fp_write, indent = 4)
74
        fp_write.close()
75
 
76
        #Scraping the page
77
        scraper = getScraper(source)
5291 varun.gupt 78
        productData = scraper.getDataFromProductPage(url)
79
 
80
        #Storing the data
81
        try:
82
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
83
            data = json.load(fp_read)
84
 
85
        except ValueError as e:
86
            print e
87
            data = {}
88
        finally:
89
            fp_read.close()
90
 
91
        if entity in data:
92
            data[entity][source] = productData
93
        else:
94
            data[entity] = {source: productData}
95
 
96
        print 'Secondary crawled data:', data
97
 
98
        fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
99
        json.dump(data, fp_write, indent = 4)
100
        fp_write.close()
101
 
102
        productData['entityId'] = entity
103
        self.write(productData)
4198 varun.gupt 104
 
105
    def get(self):
5291 varun.gupt 106
        try:
107
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
108
            data = json.load(fp_read)
109
 
110
        except ValueError as e:
111
            print e
112
            data = {}
113
        finally:
114
            fp_read.close()
115
        self.write(data)
4198 varun.gupt 116
 
3440 varun.gupt 117
class FeedbackHandler(BaseHandler):
118
 
119
    def save(self, entity, source, feedback_type, selected_item = None):
5761 amar.kumar 120
        self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
3440 varun.gupt 121
        file_to_read = open(self.feedback_file, 'r')
122
 
123
        feedbacks_json = file_to_read.read()
124
        file_to_read.close()
125
 
126
        feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
127
 
128
        if entity not in feedbacks: feedbacks[entity] = {}
129
 
130
        feedbacks[entity][source] = {'type': feedback_type}
131
 
132
        if selected_item is not None:   feedbacks[entity][source]['selected_item'] = selected_item
133
 
134
        file_to_write = open(self.feedback_file, 'w')
5291 varun.gupt 135
        json.dump(feedbacks, file_to_write, indent = 4)
3440 varun.gupt 136
        file_to_write.close()
137
 
138
    def post(self):
139
        feedback_type = self.get_argument('type')
140
        entity_id = self.get_argument('entityId')
141
        price_data_source = self.get_argument('source')
142
 
143
        print feedback_type, entity_id, price_data_source
144
 
145
        if feedback_type == 'select':
146
            selected_item = self.get_argument('selected')
147
            print selected_item
148
            self.save(entity_id, price_data_source, feedback_type, selected_item)
149
        else:
150
            self.save(entity_id, price_data_source, feedback_type)
151
 
152
    def get(self):
153
        print 'GET: Feedback data'
5761 amar.kumar 154
        self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
3440 varun.gupt 155
        file_to_read = open(self.feedback_file, 'r')
156
 
157
        feedbacks_json = file_to_read.read()
158
        file_to_read.close()
159
 
160
        self.write(feedbacks_json)
161
 
3232 varun.gupt 162
class MainHandler(BaseHandler):
163
 
4198 varun.gupt 164
    def mapSearchUrls(self, map, name):
165
 
166
        search_urls = {
167
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
168
            'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
169
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
170
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
5291 varun.gupt 171
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
5639 amar.kumar 172
            'snapdeal': '$$',
173
            'sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go',
174
            'tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'
4198 varun.gupt 175
        }
176
 
177
        for key in search_urls.iterkeys():
178
            try:
179
                if map[key]['url'] == 'Not Found':
180
                    map[key]['url'] = search_urls[key].replace('$$', name)
181
            except KeyError:
182
                map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
183
        return map
184
 
185
 
3232 varun.gupt 186
    @tornado.web.authenticated
187
    def get(self):
188
        self.loader = tornado.template.Loader('HTMLTemplates')
189
        catalog_client = CatalogClient().get_client()
3350 varun.gupt 190
        items = catalog_client.getAllItemsByStatus(status.ACTIVE)
191
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
192
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
5291 varun.gupt 193
#        synonyms = getSynonyms()
194
#        print synonyms
3232 varun.gupt 195
        retriever = Retriever()
196
        products = {}
197
 
198
        for item in items:
6170 amar.kumar 199
            if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003):  products[item.catalogItemId] = item
4198 varun.gupt 200
 
3232 varun.gupt 201
        comparative_prices = []
5291 varun.gupt 202
 
3232 varun.gupt 203
        for item in sorted(products.itervalues(), key = lambda item: item.brand):
204
            try:
205
                model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
206
                model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
207
 
5291 varun.gupt 208
                #synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
3232 varun.gupt 209
 
5291 varun.gupt 210
                search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
3440 varun.gupt 211
 
3313 varun.gupt 212
                clusters = getProductClusters(search_results)
213
                filtered_clusters = getFilteredClustersWithTopScores(clusters)
3232 varun.gupt 214
 
215
                product_name = "%s " % item.brand
216
                product_name += "%s " % model_name if model_name is not None else ''
217
                product_name += model_number if model_number is not None else ''
218
 
4198 varun.gupt 219
                display_info = getDisplayInfo(filtered_clusters, product_name)
5291 varun.gupt 220
                print 'Display Info: ', display_info
4198 varun.gupt 221
 
3440 varun.gupt 222
                display_info['entity_id'] = item.catalogItemId
3313 varun.gupt 223
                display_info['product_name'] = product_name
224
                display_info['saholic'] = {'price': item.sellingPrice}
225
                comparative_prices.append(display_info)
3232 varun.gupt 226
            except Exception as e:
5377 varun.gupt 227
                print 'Exception for %s:' % item.catalogItemId, e
3232 varun.gupt 228
 
5761 amar.kumar 229
        json.dump(comparative_prices, open('/usr/pcd_log', 'w'), indent = 4)
3232 varun.gupt 230
        self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
5291 varun.gupt 231
 
232
class WatchlistHandler(BaseHandler):
233
 
234
    @tornado.web.authenticated
235
    def get(self):
236
        watchlistManager = WatchListManager()
237
        watchlist = watchlistManager.getWatchlist()
238
        print 'Getting watchlist: ', watchlist
239
        entityIds = []
240
 
241
        for id in watchlist:
242
            entityIds.append(int(id))
243
 
244
        self.write(str(entityIds))
245
 
246
    def post(self):
247
        watchlistManager = WatchListManager()
248
 
249
        requestType = self.get_argument('type').strip()
250
        entityId = self.get_argument('entity')
251
 
252
        print 'Request Type:', requestType, ', Entity Id: ', entityId
253
 
254
        if requestType == 'save':
255
            watchlistManager.save(entity = entityId)
256
 
257
        elif requestType == 'delete':
258
            watchlistManager.remove(entity = entityId)
259
 
260
        self.write("{}")
3232 varun.gupt 261
 
5401 varun.gupt 262
class DownloadHandler(BaseHandler):
263
 
264
    def post(self):
265
        catalog_client = CatalogClient().get_client()
5413 varun.gupt 266
        retriever = Retriever()
5401 varun.gupt 267
        vendors = {}
268
 
269
        for vendor in catalog_client.getAllVendors():
270
            vendors[vendor.id] = vendor.name
271
 
272
        self.set_header('Content-Type', 'text/csv')
5413 varun.gupt 273
        self.set_header("Content-disposition", "inline; filename=price-comparison.xls")
274
 
5401 varun.gupt 275
        newLine = '\n'
276
        tab = '\t'
277
 
278
        header = 'Product' + tab
5413 varun.gupt 279
        header += 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab +  'Vendor' + tab + 'TP' + tab
280
        header += 'Saholic' + tab + 'Flipkart' + tab + 'Homeshop18' + tab + 'Infibeam' + tab +  'Snapdeal' + newLine
5401 varun.gupt 281
 
282
        responseText = header
283
 
284
        for item in getValidItems():
285
            vendorItemPricings = catalog_client.getAllItemPricing(item.id)
286
            sortedPricings = sorted(vendorItemPricings, key = lambda vendorItemPricing: vendorItemPricing.transferPrice)
5413 varun.gupt 287
            productName = getProductName(item)
5401 varun.gupt 288
 
5413 varun.gupt 289
            row = productName + tab
5401 varun.gupt 290
 
291
            if len(sortedPricings) > 0:
292
                row += vendors[sortedPricings[0].vendorId] + tab + str(sortedPricings[0].transferPrice) + tab
293
            else:
294
                row += tab + tab
295
 
296
            if len(sortedPricings) > 1:
297
                row += vendors[sortedPricings[1].vendorId] + tab + str(sortedPricings[1].transferPrice) + tab
298
            else:
299
                row += tab + tab
300
 
301
            if len(sortedPricings) > 2:
302
                row += vendors[sortedPricings[2].vendorId] + tab + str(sortedPricings[2].transferPrice) + tab
303
            else:
304
                row += tab + tab
305
 
5413 varun.gupt 306
            row += str(item.sellingPrice) + tab
5401 varun.gupt 307
 
5413 varun.gupt 308
            model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
309
            model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
310
 
311
            search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
312
 
313
            clusters = getProductClusters(search_results)
314
            filtered_clusters = getFilteredClustersWithTopScores(clusters)
315
            display_info = getDisplayInfo(filtered_clusters, productName)
316
 
317
            if 'price' in display_info['flipkart'] and display_info['flipkart']['price'] is not None:
318
                row += display_info['flipkart']['price'] + tab
319
            else:
320
                row += tab
321
 
322
            if 'price' in display_info['homeshop18'] and display_info['homeshop18']['price'] is not None:
323
                row += display_info['homeshop18']['price'] + tab
324
            else:
325
                row += tab
326
 
327
            if 'price' in display_info['infibeam'] and display_info['infibeam']['price'] is not None:
328
                row += display_info['infibeam']['price'] + tab
329
            else:
330
                row += tab
331
 
332
            if 'price' in display_info['snapdeal'] and display_info['snapdeal']['price'] is not None:
333
                row += display_info['snapdeal']['price'] + tab
334
            else:
335
                row += tab
5639 amar.kumar 336
 
337
            if 'price' in display_info['sulekha'] and display_info['sulekha']['price'] is not None:
338
                row += display_info['sulekha']['price'] + tab
339
            else:
340
                row += tab
341
 
342
            if 'price' in display_info['tradus'] and display_info['tradus']['price'] is not None:
343
                row += display_info['tradus']['price'] + tab
344
            else:
345
                row += tab
5413 varun.gupt 346
            responseText += row + newLine
5401 varun.gupt 347
 
348
        self.write(responseText)
349
 
3232 varun.gupt 350
settings  = {
351
        'static_path': os.path.join(os.path.dirname(__file__), 'static'),
352
        'login_url': '/login', 
353
        'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
354
}
355
 
356
application = tornado.web.Application([
357
                (r"/", MainHandler),
358
                (r"/login", LoginHandler),
3440 varun.gupt 359
                (r"/feedback", FeedbackHandler),
4198 varun.gupt 360
                (r"/feedback-url", URLFeedbackHandler),
5291 varun.gupt 361
                (r"/watchlist", WatchlistHandler),
5401 varun.gupt 362
                (r"/download", DownloadHandler),
3232 varun.gupt 363
                (r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
364
            ], **settings)
365
 
366
if __name__ == '__main__':
367
    http_server = tornado.httpserver.HTTPServer(application)
368
    http_server.listen(8889)
5413 varun.gupt 369
    tornado.ioloop.IOLoop.instance().start()