Rev 5761 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 31-Aug-2011@author: Varun Gupta'''import tornado.httpserver, tornado.ioloop, tornado.webimport json, os, ConfigParser, sysfrom PyLucene.Retriever import Retrieverfrom Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \getDisplayInfo, getValidItems, getProductNamefrom ScraperLoader import getScraperfrom PyLucene.IndexBuilder import IndexBuilderfrom DataStore.WatchListManager import WatchListManagercmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))if cmd_folder not in sys.path:sys.path.insert(0, cmd_folder)from shop2020.clients.CatalogClient import CatalogClientfrom shop2020.thriftpy.model.v1.catalog.ttypes import statusclass BaseHandler(tornado.web.RequestHandler):def get_current_user(self):return self.get_secure_cookie('userauth')class LoginHandler(BaseHandler):def get(self):self.loader = tornado.template.Loader('HTMLTemplates')self.write(self.loader.load('LoginForm.html').generate())def post(self):config = ConfigParser.SafeConfigParser()config.read('app.cfg')username = self.get_argument('username')password = self.get_argument('password')if username == config.get('auth', 'username') and password == config.get('auth', 'password'):print 'Password Matched'self.set_secure_cookie("userauth", username + '_' + password)self.redirect('/')else:self.redirect('/login')class URLFeedbackHandler(BaseHandler):url_feedback_file = '/usr/price-comp-dashboard/urls.json'secondary_crawl_file = '/usr/price-comp-dashboard/secondary-crawl.json'def post(self):try:fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')urls = json.load(fp_read)except ValueError as e:print eurls = {}finally:fp_read.close()print 'Existing URLs: ', urlsentity = self.get_argument('entity')source = self.get_argument('source')url = self.get_argument('url')if entity in urls:urls[entity][source] = urlelse:urls[entity] = {source: url}print 'New set of URLs: ', urlsfp_write = open(URLFeedbackHandler.url_feedback_file, 'w')json.dump(urls, fp_write, indent = 4)fp_write.close()#Scraping the pagescraper = getScraper(source)productData = scraper.getDataFromProductPage(url)#Storing the datatry:fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')data = json.load(fp_read)except ValueError as e:print edata = {}finally:fp_read.close()if entity in data:data[entity][source] = productDataelse:data[entity] = {source: productData}print 'Secondary crawled data:', datafp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')json.dump(data, fp_write, indent = 4)fp_write.close()productData['entityId'] = entityself.write(productData)def get(self):try:fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')data = json.load(fp_read)except ValueError as e:print edata = {}finally:fp_read.close()self.write(data)class FeedbackHandler(BaseHandler):def save(self, entity, source, feedback_type, selected_item = None):self.feedback_file = '/usr/price-comp-dashboard/feedback.json'file_to_read = open(self.feedback_file, 'r')feedbacks_json = file_to_read.read()file_to_read.close()feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}if entity not in feedbacks: feedbacks[entity] = {}feedbacks[entity][source] = {'type': feedback_type}if selected_item is not None: feedbacks[entity][source]['selected_item'] = selected_itemfile_to_write = open(self.feedback_file, 'w')json.dump(feedbacks, file_to_write, indent = 4)file_to_write.close()def post(self):feedback_type = self.get_argument('type')entity_id = self.get_argument('entityId')price_data_source = self.get_argument('source')print feedback_type, entity_id, price_data_sourceif feedback_type == 'select':selected_item = self.get_argument('selected')print selected_itemself.save(entity_id, price_data_source, feedback_type, selected_item)else:self.save(entity_id, price_data_source, feedback_type)def get(self):print 'GET: Feedback data'self.feedback_file = '/usr/price-comp-dashboard/feedback.json'file_to_read = open(self.feedback_file, 'r')feedbacks_json = file_to_read.read()file_to_read.close()self.write(feedbacks_json)class MainHandler(BaseHandler):def mapSearchUrls(self, map, name):search_urls = {'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles','homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024','adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$','infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$','letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$','snapdeal': '$$','sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go','tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'}for key in search_urls.iterkeys():try:if map[key]['url'] == 'Not Found':map[key]['url'] = search_urls[key].replace('$$', name)except KeyError:map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}return map@tornado.web.authenticateddef get(self):self.loader = tornado.template.Loader('HTMLTemplates')catalog_client = CatalogClient().get_client()items = catalog_client.getAllItemsByStatus(status.ACTIVE)items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))# synonyms = getSynonyms()# print synonymsretriever = Retriever()products = {}for item in items:if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003): products[item.catalogItemId] = itemcomparative_prices = []for item in sorted(products.itervalues(), key = lambda item: item.brand):try:model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else Nonemodel_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None#synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else Nonesearch_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)clusters = getProductClusters(search_results)filtered_clusters = getFilteredClustersWithTopScores(clusters)product_name = "%s " % item.brandproduct_name += "%s " % model_name if model_name is not None else ''product_name += model_number if model_number is not None else ''display_info = getDisplayInfo(filtered_clusters, product_name)print 'Display Info: ', display_infodisplay_info['entity_id'] = item.catalogItemIddisplay_info['product_name'] = product_namedisplay_info['saholic'] = {'price': item.sellingPrice}comparative_prices.append(display_info)except Exception as e:print 'Exception for %s:' % item.catalogItemId, ejson.dump(comparative_prices, open('/usr/pcd_log', 'w'), indent = 4)self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))class WatchlistHandler(BaseHandler):@tornado.web.authenticateddef get(self):watchlistManager = WatchListManager()watchlist = watchlistManager.getWatchlist()print 'Getting watchlist: ', watchlistentityIds = []for id in watchlist:entityIds.append(int(id))self.write(str(entityIds))def post(self):watchlistManager = WatchListManager()requestType = self.get_argument('type').strip()entityId = self.get_argument('entity')print 'Request Type:', requestType, ', Entity Id: ', entityIdif requestType == 'save':watchlistManager.save(entity = entityId)elif requestType == 'delete':watchlistManager.remove(entity = entityId)self.write("{}")class DownloadHandler(BaseHandler):def post(self):catalog_client = CatalogClient().get_client()retriever = Retriever()vendors = {}for vendor in catalog_client.getAllVendors():vendors[vendor.id] = vendor.nameself.set_header('Content-Type', 'text/csv')self.set_header("Content-disposition", "inline; filename=price-comparison.xls")newLine = '\n'tab = '\t'header = 'Product' + tabheader += 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tabheader += 'Saholic' + tab + 'Flipkart' + tab + 'Homeshop18' + tab + 'Infibeam' + tab + 'Snapdeal' + newLineresponseText = headerfor item in getValidItems():vendorItemPricings = catalog_client.getAllItemPricing(item.id)sortedPricings = sorted(vendorItemPricings, key = lambda vendorItemPricing: vendorItemPricing.transferPrice)productName = getProductName(item)row = productName + tabif len(sortedPricings) > 0:row += vendors[sortedPricings[0].vendorId] + tab + str(sortedPricings[0].transferPrice) + tabelse:row += tab + tabif len(sortedPricings) > 1:row += vendors[sortedPricings[1].vendorId] + tab + str(sortedPricings[1].transferPrice) + tabelse:row += tab + tabif len(sortedPricings) > 2:row += vendors[sortedPricings[2].vendorId] + tab + str(sortedPricings[2].transferPrice) + tabelse:row += tab + tabrow += str(item.sellingPrice) + tabmodel_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else Nonemodel_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else Nonesearch_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)clusters = getProductClusters(search_results)filtered_clusters = getFilteredClustersWithTopScores(clusters)display_info = getDisplayInfo(filtered_clusters, productName)if 'price' in display_info['flipkart'] and display_info['flipkart']['price'] is not None:row += display_info['flipkart']['price'] + tabelse:row += tabif 'price' in display_info['homeshop18'] and display_info['homeshop18']['price'] is not None:row += display_info['homeshop18']['price'] + tabelse:row += tabif 'price' in display_info['infibeam'] and display_info['infibeam']['price'] is not None:row += display_info['infibeam']['price'] + tabelse:row += tabif 'price' in display_info['snapdeal'] and display_info['snapdeal']['price'] is not None:row += display_info['snapdeal']['price'] + tabelse:row += tabif 'price' in display_info['sulekha'] and display_info['sulekha']['price'] is not None:row += display_info['sulekha']['price'] + tabelse:row += tabif 'price' in display_info['tradus'] and display_info['tradus']['price'] is not None:row += display_info['tradus']['price'] + tabelse:row += tabresponseText += row + newLineself.write(responseText)settings = {'static_path': os.path.join(os.path.dirname(__file__), 'static'),'login_url': '/login','cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="}application = tornado.web.Application([(r"/", MainHandler),(r"/login", LoginHandler),(r"/feedback", FeedbackHandler),(r"/feedback-url", URLFeedbackHandler),(r"/watchlist", WatchlistHandler),(r"/download", DownloadHandler),(r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))], **settings)if __name__ == '__main__':http_server = tornado.httpserver.HTTPServer(application)http_server.listen(8889)tornado.ioloop.IOLoop.instance().start()