Subversion Repositories SmartDukaan

Rev

Rev 4198 | Rev 5377 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 31-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
4198 varun.gupt 6
import tornado.httpserver, tornado.ioloop, tornado.web
3232 varun.gupt 7
import json, os, ConfigParser, sys
8
from PyLucene.Retriever import Retriever
3313 varun.gupt 9
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
3440 varun.gupt 10
    getDisplayInfo, getSynonyms
4198 varun.gupt 11
from ScraperLoader import getScraper
12
from PyLucene.IndexBuilder import IndexBuilder
5291 varun.gupt 13
from DataStore.WatchListManager import WatchListManager
3232 varun.gupt 14
 
3235 rajveer 15
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
3232 varun.gupt 16
if cmd_folder not in sys.path:
17
    sys.path.insert(0, cmd_folder)
18
 
19
from shop2020.clients.CatalogClient import CatalogClient
3350 varun.gupt 20
from shop2020.thriftpy.model.v1.catalog.ttypes import status
3440 varun.gupt 21
 
3232 varun.gupt 22
class BaseHandler(tornado.web.RequestHandler):
23
    def get_current_user(self):
24
        return self.get_secure_cookie('userauth')
25
 
26
class LoginHandler(BaseHandler):
27
    def get(self):
28
        self.loader = tornado.template.Loader('HTMLTemplates')
29
        self.write(self.loader.load('LoginForm.html').generate())
30
 
31
    def post(self):
32
        config = ConfigParser.SafeConfigParser()
33
        config.read('app.cfg')
34
 
35
        username = self.get_argument('username')
36
        password = self.get_argument('password')
37
 
38
        if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
39
            print 'Password Matched'
40
            self.set_secure_cookie("userauth", username + '_' + password)
41
            self.redirect('/')
42
        else:
43
            self.redirect('/login')
44
 
4198 varun.gupt 45
class URLFeedbackHandler(BaseHandler):
5291 varun.gupt 46
    url_feedback_file = '/tmp/price-comp-dashboard/urls.json'
47
    secondary_crawl_file = '/tmp/price-comp-dashboard/secondary-crawl.json'
4198 varun.gupt 48
 
49
    def post(self):
50
        try:
5291 varun.gupt 51
            fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
4198 varun.gupt 52
            urls = json.load(fp_read)
53
 
54
        except ValueError as e:
55
            print e
56
            urls = {}
57
        finally:
58
            fp_read.close()
59
 
5291 varun.gupt 60
        print 'Existing URLs: ', urls
61
 
4198 varun.gupt 62
        entity = self.get_argument('entity')
63
        source = self.get_argument('source')
64
        url = self.get_argument('url')
65
 
66
        if entity in urls:
67
            urls[entity][source] = url
68
        else:
69
            urls[entity] = {source: url}
70
 
5291 varun.gupt 71
        print 'New set of URLs: ', urls
72
        fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
4198 varun.gupt 73
        json.dump(urls, fp_write, indent = 4)
74
        fp_write.close()
75
 
76
        #Scraping the page
77
        scraper = getScraper(source)
5291 varun.gupt 78
        productData = scraper.getDataFromProductPage(url)
79
 
80
        #Storing the data
81
        try:
82
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
83
            data = json.load(fp_read)
84
 
85
        except ValueError as e:
86
            print e
87
            data = {}
88
        finally:
89
            fp_read.close()
90
 
91
        if entity in data:
92
            data[entity][source] = productData
93
        else:
94
            data[entity] = {source: productData}
95
 
96
        print 'Secondary crawled data:', data
97
 
98
        fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
99
        json.dump(data, fp_write, indent = 4)
100
        fp_write.close()
101
 
102
        productData['entityId'] = entity
103
        self.write(productData)
4198 varun.gupt 104
 
105
    def get(self):
5291 varun.gupt 106
        try:
107
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
108
            data = json.load(fp_read)
109
 
110
        except ValueError as e:
111
            print e
112
            data = {}
113
        finally:
114
            fp_read.close()
115
        self.write(data)
4198 varun.gupt 116
 
3440 varun.gupt 117
class FeedbackHandler(BaseHandler):
118
 
119
    def save(self, entity, source, feedback_type, selected_item = None):
4198 varun.gupt 120
        self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
3440 varun.gupt 121
        file_to_read = open(self.feedback_file, 'r')
122
 
123
        feedbacks_json = file_to_read.read()
124
        file_to_read.close()
125
 
126
        feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
127
 
128
        if entity not in feedbacks: feedbacks[entity] = {}
129
 
130
        feedbacks[entity][source] = {'type': feedback_type}
131
 
132
        if selected_item is not None:   feedbacks[entity][source]['selected_item'] = selected_item
133
 
134
        file_to_write = open(self.feedback_file, 'w')
5291 varun.gupt 135
        json.dump(feedbacks, file_to_write, indent = 4)
3440 varun.gupt 136
        file_to_write.close()
137
 
138
    def post(self):
139
        feedback_type = self.get_argument('type')
140
        entity_id = self.get_argument('entityId')
141
        price_data_source = self.get_argument('source')
142
 
143
        print feedback_type, entity_id, price_data_source
144
 
145
        if feedback_type == 'select':
146
            selected_item = self.get_argument('selected')
147
            print selected_item
148
            self.save(entity_id, price_data_source, feedback_type, selected_item)
149
        else:
150
            self.save(entity_id, price_data_source, feedback_type)
151
 
152
    def get(self):
153
        print 'GET: Feedback data'
4198 varun.gupt 154
        self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
3440 varun.gupt 155
        file_to_read = open(self.feedback_file, 'r')
156
 
157
        feedbacks_json = file_to_read.read()
158
        file_to_read.close()
159
 
160
        self.write(feedbacks_json)
161
 
3232 varun.gupt 162
class MainHandler(BaseHandler):
163
 
4198 varun.gupt 164
    def mapSearchUrls(self, map, name):
165
 
166
        search_urls = {
167
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
168
            'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
169
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
170
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
5291 varun.gupt 171
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
172
            'snapdeal': '$$'
4198 varun.gupt 173
        }
174
 
175
        for key in search_urls.iterkeys():
176
            try:
177
                if map[key]['url'] == 'Not Found':
178
                    map[key]['url'] = search_urls[key].replace('$$', name)
179
            except KeyError:
180
                map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
181
        return map
182
 
183
 
3232 varun.gupt 184
    @tornado.web.authenticated
185
    def get(self):
186
        self.loader = tornado.template.Loader('HTMLTemplates')
187
        catalog_client = CatalogClient().get_client()
3350 varun.gupt 188
        items = catalog_client.getAllItemsByStatus(status.ACTIVE)
189
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
190
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
5291 varun.gupt 191
#        synonyms = getSynonyms()
192
#        print synonyms
3232 varun.gupt 193
        retriever = Retriever()
194
        products = {}
195
 
196
        for item in items:
197
            if item.category in (10002, 10003, 10004, 10005, 10010):  products[item.catalogItemId] = item
4198 varun.gupt 198
 
3232 varun.gupt 199
        comparative_prices = []
5291 varun.gupt 200
 
3232 varun.gupt 201
        for item in sorted(products.itervalues(), key = lambda item: item.brand):
202
            try:
203
                model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
204
                model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
205
 
5291 varun.gupt 206
                #synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
3232 varun.gupt 207
 
5291 varun.gupt 208
                search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
3440 varun.gupt 209
 
3313 varun.gupt 210
                clusters = getProductClusters(search_results)
211
                filtered_clusters = getFilteredClustersWithTopScores(clusters)
3232 varun.gupt 212
 
213
                product_name = "%s " % item.brand
214
                product_name += "%s " % model_name if model_name is not None else ''
215
                product_name += model_number if model_number is not None else ''
216
 
4198 varun.gupt 217
                display_info = getDisplayInfo(filtered_clusters, product_name)
5291 varun.gupt 218
                print 'Display Info: ', display_info
4198 varun.gupt 219
 
3440 varun.gupt 220
                display_info['entity_id'] = item.catalogItemId
3313 varun.gupt 221
                display_info['product_name'] = product_name
222
                display_info['saholic'] = {'price': item.sellingPrice}
223
                comparative_prices.append(display_info)
3232 varun.gupt 224
            except Exception as e:
4198 varun.gupt 225
                print 'Exception:', e
3232 varun.gupt 226
 
5291 varun.gupt 227
        json.dump(comparative_prices, open('/tmp/pcd_log', 'w'), indent = 4)
3232 varun.gupt 228
        self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
5291 varun.gupt 229
 
230
class WatchlistHandler(BaseHandler):
231
 
232
    @tornado.web.authenticated
233
    def get(self):
234
        watchlistManager = WatchListManager()
235
        watchlist = watchlistManager.getWatchlist()
236
        print 'Getting watchlist: ', watchlist
237
        entityIds = []
238
 
239
        for id in watchlist:
240
            entityIds.append(int(id))
241
 
242
        self.write(str(entityIds))
243
 
244
    def post(self):
245
        watchlistManager = WatchListManager()
246
 
247
        requestType = self.get_argument('type').strip()
248
        entityId = self.get_argument('entity')
249
 
250
        print 'Request Type:', requestType, ', Entity Id: ', entityId
251
 
252
        if requestType == 'save':
253
            watchlistManager.save(entity = entityId)
254
 
255
        elif requestType == 'delete':
256
            watchlistManager.remove(entity = entityId)
257
 
258
        self.write("{}")
3232 varun.gupt 259
 
260
settings  = {
261
        'static_path': os.path.join(os.path.dirname(__file__), 'static'),
262
        'login_url': '/login', 
263
        'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
264
}
265
 
266
application = tornado.web.Application([
267
                (r"/", MainHandler),
268
                (r"/login", LoginHandler),
3440 varun.gupt 269
                (r"/feedback", FeedbackHandler),
4198 varun.gupt 270
                (r"/feedback-url", URLFeedbackHandler),
5291 varun.gupt 271
                (r"/watchlist", WatchlistHandler),
3232 varun.gupt 272
                (r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
273
            ], **settings)
274
 
275
if __name__ == '__main__':
276
    http_server = tornado.httpserver.HTTPServer(application)
277
    http_server.listen(8889)
278
    tornado.ioloop.IOLoop.instance().start()