Subversion Repositories SmartDukaan

Rev

Rev 5401 | Rev 5639 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 31-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
4198 varun.gupt 6
import tornado.httpserver, tornado.ioloop, tornado.web
3232 varun.gupt 7
import json, os, ConfigParser, sys
8
from PyLucene.Retriever import Retriever
3313 varun.gupt 9
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
5401 varun.gupt 10
    getDisplayInfo, getValidItems, getProductName
4198 varun.gupt 11
from ScraperLoader import getScraper
12
from PyLucene.IndexBuilder import IndexBuilder
5291 varun.gupt 13
from DataStore.WatchListManager import WatchListManager
3232 varun.gupt 14
 
3235 rajveer 15
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
3232 varun.gupt 16
if cmd_folder not in sys.path:
17
    sys.path.insert(0, cmd_folder)
18
 
19
from shop2020.clients.CatalogClient import CatalogClient
3350 varun.gupt 20
from shop2020.thriftpy.model.v1.catalog.ttypes import status
3440 varun.gupt 21
 
3232 varun.gupt 22
class BaseHandler(tornado.web.RequestHandler):
23
    def get_current_user(self):
24
        return self.get_secure_cookie('userauth')
25
 
26
class LoginHandler(BaseHandler):
27
    def get(self):
28
        self.loader = tornado.template.Loader('HTMLTemplates')
29
        self.write(self.loader.load('LoginForm.html').generate())
30
 
31
    def post(self):
32
        config = ConfigParser.SafeConfigParser()
33
        config.read('app.cfg')
34
 
35
        username = self.get_argument('username')
36
        password = self.get_argument('password')
37
 
38
        if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
39
            print 'Password Matched'
40
            self.set_secure_cookie("userauth", username + '_' + password)
41
            self.redirect('/')
42
        else:
43
            self.redirect('/login')
44
 
4198 varun.gupt 45
class URLFeedbackHandler(BaseHandler):
5291 varun.gupt 46
    url_feedback_file = '/tmp/price-comp-dashboard/urls.json'
47
    secondary_crawl_file = '/tmp/price-comp-dashboard/secondary-crawl.json'
4198 varun.gupt 48
 
49
    def post(self):
50
        try:
5291 varun.gupt 51
            fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
4198 varun.gupt 52
            urls = json.load(fp_read)
53
 
54
        except ValueError as e:
55
            print e
56
            urls = {}
57
        finally:
58
            fp_read.close()
59
 
5291 varun.gupt 60
        print 'Existing URLs: ', urls
61
 
4198 varun.gupt 62
        entity = self.get_argument('entity')
63
        source = self.get_argument('source')
64
        url = self.get_argument('url')
65
 
66
        if entity in urls:
67
            urls[entity][source] = url
68
        else:
69
            urls[entity] = {source: url}
70
 
5291 varun.gupt 71
        print 'New set of URLs: ', urls
72
        fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
4198 varun.gupt 73
        json.dump(urls, fp_write, indent = 4)
74
        fp_write.close()
75
 
76
        #Scraping the page
77
        scraper = getScraper(source)
5291 varun.gupt 78
        productData = scraper.getDataFromProductPage(url)
79
 
80
        #Storing the data
81
        try:
82
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
83
            data = json.load(fp_read)
84
 
85
        except ValueError as e:
86
            print e
87
            data = {}
88
        finally:
89
            fp_read.close()
90
 
91
        if entity in data:
92
            data[entity][source] = productData
93
        else:
94
            data[entity] = {source: productData}
95
 
96
        print 'Secondary crawled data:', data
97
 
98
        fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
99
        json.dump(data, fp_write, indent = 4)
100
        fp_write.close()
101
 
102
        productData['entityId'] = entity
103
        self.write(productData)
4198 varun.gupt 104
 
105
    def get(self):
5291 varun.gupt 106
        try:
107
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
108
            data = json.load(fp_read)
109
 
110
        except ValueError as e:
111
            print e
112
            data = {}
113
        finally:
114
            fp_read.close()
115
        self.write(data)
4198 varun.gupt 116
 
3440 varun.gupt 117
class FeedbackHandler(BaseHandler):
118
 
119
    def save(self, entity, source, feedback_type, selected_item = None):
4198 varun.gupt 120
        self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
3440 varun.gupt 121
        file_to_read = open(self.feedback_file, 'r')
122
 
123
        feedbacks_json = file_to_read.read()
124
        file_to_read.close()
125
 
126
        feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
127
 
128
        if entity not in feedbacks: feedbacks[entity] = {}
129
 
130
        feedbacks[entity][source] = {'type': feedback_type}
131
 
132
        if selected_item is not None:   feedbacks[entity][source]['selected_item'] = selected_item
133
 
134
        file_to_write = open(self.feedback_file, 'w')
5291 varun.gupt 135
        json.dump(feedbacks, file_to_write, indent = 4)
3440 varun.gupt 136
        file_to_write.close()
137
 
138
    def post(self):
139
        feedback_type = self.get_argument('type')
140
        entity_id = self.get_argument('entityId')
141
        price_data_source = self.get_argument('source')
142
 
143
        print feedback_type, entity_id, price_data_source
144
 
145
        if feedback_type == 'select':
146
            selected_item = self.get_argument('selected')
147
            print selected_item
148
            self.save(entity_id, price_data_source, feedback_type, selected_item)
149
        else:
150
            self.save(entity_id, price_data_source, feedback_type)
151
 
152
    def get(self):
153
        print 'GET: Feedback data'
4198 varun.gupt 154
        self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
3440 varun.gupt 155
        file_to_read = open(self.feedback_file, 'r')
156
 
157
        feedbacks_json = file_to_read.read()
158
        file_to_read.close()
159
 
160
        self.write(feedbacks_json)
161
 
3232 varun.gupt 162
class MainHandler(BaseHandler):
163
 
4198 varun.gupt 164
    def mapSearchUrls(self, map, name):
165
 
166
        search_urls = {
167
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
168
            'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
169
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
170
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
5291 varun.gupt 171
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
172
            'snapdeal': '$$'
4198 varun.gupt 173
        }
174
 
175
        for key in search_urls.iterkeys():
176
            try:
177
                if map[key]['url'] == 'Not Found':
178
                    map[key]['url'] = search_urls[key].replace('$$', name)
179
            except KeyError:
180
                map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
181
        return map
182
 
183
 
3232 varun.gupt 184
    @tornado.web.authenticated
185
    def get(self):
186
        self.loader = tornado.template.Loader('HTMLTemplates')
187
        catalog_client = CatalogClient().get_client()
3350 varun.gupt 188
        items = catalog_client.getAllItemsByStatus(status.ACTIVE)
189
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
190
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
5291 varun.gupt 191
#        synonyms = getSynonyms()
192
#        print synonyms
3232 varun.gupt 193
        retriever = Retriever()
194
        products = {}
195
 
196
        for item in items:
197
            if item.category in (10002, 10003, 10004, 10005, 10010):  products[item.catalogItemId] = item
4198 varun.gupt 198
 
3232 varun.gupt 199
        comparative_prices = []
5291 varun.gupt 200
 
3232 varun.gupt 201
        for item in sorted(products.itervalues(), key = lambda item: item.brand):
202
            try:
203
                model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
204
                model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
205
 
5291 varun.gupt 206
                #synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
3232 varun.gupt 207
 
5291 varun.gupt 208
                search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
3440 varun.gupt 209
 
3313 varun.gupt 210
                clusters = getProductClusters(search_results)
211
                filtered_clusters = getFilteredClustersWithTopScores(clusters)
3232 varun.gupt 212
 
213
                product_name = "%s " % item.brand
214
                product_name += "%s " % model_name if model_name is not None else ''
215
                product_name += model_number if model_number is not None else ''
216
 
4198 varun.gupt 217
                display_info = getDisplayInfo(filtered_clusters, product_name)
5291 varun.gupt 218
                print 'Display Info: ', display_info
4198 varun.gupt 219
 
3440 varun.gupt 220
                display_info['entity_id'] = item.catalogItemId
3313 varun.gupt 221
                display_info['product_name'] = product_name
222
                display_info['saholic'] = {'price': item.sellingPrice}
223
                comparative_prices.append(display_info)
3232 varun.gupt 224
            except Exception as e:
5377 varun.gupt 225
                print 'Exception for %s:' % item.catalogItemId, e
3232 varun.gupt 226
 
5291 varun.gupt 227
        json.dump(comparative_prices, open('/tmp/pcd_log', 'w'), indent = 4)
3232 varun.gupt 228
        self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
5291 varun.gupt 229
 
230
class WatchlistHandler(BaseHandler):
231
 
232
    @tornado.web.authenticated
233
    def get(self):
234
        watchlistManager = WatchListManager()
235
        watchlist = watchlistManager.getWatchlist()
236
        print 'Getting watchlist: ', watchlist
237
        entityIds = []
238
 
239
        for id in watchlist:
240
            entityIds.append(int(id))
241
 
242
        self.write(str(entityIds))
243
 
244
    def post(self):
245
        watchlistManager = WatchListManager()
246
 
247
        requestType = self.get_argument('type').strip()
248
        entityId = self.get_argument('entity')
249
 
250
        print 'Request Type:', requestType, ', Entity Id: ', entityId
251
 
252
        if requestType == 'save':
253
            watchlistManager.save(entity = entityId)
254
 
255
        elif requestType == 'delete':
256
            watchlistManager.remove(entity = entityId)
257
 
258
        self.write("{}")
3232 varun.gupt 259
 
5401 varun.gupt 260
class DownloadHandler(BaseHandler):
261
 
262
    def post(self):
263
        catalog_client = CatalogClient().get_client()
5413 varun.gupt 264
        retriever = Retriever()
5401 varun.gupt 265
        vendors = {}
266
 
267
        for vendor in catalog_client.getAllVendors():
268
            vendors[vendor.id] = vendor.name
269
 
270
        self.set_header('Content-Type', 'text/csv')
5413 varun.gupt 271
        self.set_header("Content-disposition", "inline; filename=price-comparison.xls")
272
 
5401 varun.gupt 273
        newLine = '\n'
274
        tab = '\t'
275
 
276
        header = 'Product' + tab
5413 varun.gupt 277
        header += 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab +  'Vendor' + tab + 'TP' + tab
278
        header += 'Saholic' + tab + 'Flipkart' + tab + 'Homeshop18' + tab + 'Infibeam' + tab +  'Snapdeal' + newLine
5401 varun.gupt 279
 
280
        responseText = header
281
 
282
        for item in getValidItems():
283
            vendorItemPricings = catalog_client.getAllItemPricing(item.id)
284
            sortedPricings = sorted(vendorItemPricings, key = lambda vendorItemPricing: vendorItemPricing.transferPrice)
5413 varun.gupt 285
            productName = getProductName(item)
5401 varun.gupt 286
 
5413 varun.gupt 287
            row = productName + tab
5401 varun.gupt 288
 
289
            if len(sortedPricings) > 0:
290
                row += vendors[sortedPricings[0].vendorId] + tab + str(sortedPricings[0].transferPrice) + tab
291
            else:
292
                row += tab + tab
293
 
294
            if len(sortedPricings) > 1:
295
                row += vendors[sortedPricings[1].vendorId] + tab + str(sortedPricings[1].transferPrice) + tab
296
            else:
297
                row += tab + tab
298
 
299
            if len(sortedPricings) > 2:
300
                row += vendors[sortedPricings[2].vendorId] + tab + str(sortedPricings[2].transferPrice) + tab
301
            else:
302
                row += tab + tab
303
 
5413 varun.gupt 304
            row += str(item.sellingPrice) + tab
5401 varun.gupt 305
 
5413 varun.gupt 306
            model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
307
            model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
308
 
309
            search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
310
 
311
            clusters = getProductClusters(search_results)
312
            filtered_clusters = getFilteredClustersWithTopScores(clusters)
313
            display_info = getDisplayInfo(filtered_clusters, productName)
314
 
315
            if 'price' in display_info['flipkart'] and display_info['flipkart']['price'] is not None:
316
                row += display_info['flipkart']['price'] + tab
317
            else:
318
                row += tab
319
 
320
            if 'price' in display_info['homeshop18'] and display_info['homeshop18']['price'] is not None:
321
                row += display_info['homeshop18']['price'] + tab
322
            else:
323
                row += tab
324
 
325
            if 'price' in display_info['infibeam'] and display_info['infibeam']['price'] is not None:
326
                row += display_info['infibeam']['price'] + tab
327
            else:
328
                row += tab
329
 
330
            if 'price' in display_info['snapdeal'] and display_info['snapdeal']['price'] is not None:
331
                row += display_info['snapdeal']['price'] + tab
332
            else:
333
                row += tab
334
            responseText += row + newLine
5401 varun.gupt 335
 
336
        self.write(responseText)
337
 
3232 varun.gupt 338
settings  = {
339
        'static_path': os.path.join(os.path.dirname(__file__), 'static'),
340
        'login_url': '/login', 
341
        'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
342
}
343
 
344
application = tornado.web.Application([
345
                (r"/", MainHandler),
346
                (r"/login", LoginHandler),
3440 varun.gupt 347
                (r"/feedback", FeedbackHandler),
4198 varun.gupt 348
                (r"/feedback-url", URLFeedbackHandler),
5291 varun.gupt 349
                (r"/watchlist", WatchlistHandler),
5401 varun.gupt 350
                (r"/download", DownloadHandler),
3232 varun.gupt 351
                (r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
352
            ], **settings)
353
 
354
if __name__ == '__main__':
355
    http_server = tornado.httpserver.HTTPServer(application)
356
    http_server.listen(8889)
5413 varun.gupt 357
    tornado.ioloop.IOLoop.instance().start()