WebSVN – SmartDukaan – /trunk/PriceComparisonFramework/src/WebUI/PriceComparisonServer.py

'''
Created on 31-Aug-2011

@author: Varun Gupta
'''
import tornado.httpserver, tornado.ioloop, tornado.web
import json, os, ConfigParser, sys
from PyLucene.Retriever import Retriever
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
    getDisplayInfo, getValidItems, getProductName
from ScraperLoader import getScraper
from PyLucene.IndexBuilder import IndexBuilder
from DataStore.WatchListManager import WatchListManager

cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
if cmd_folder not in sys.path:
    sys.path.insert(0, cmd_folder)

from shop2020.clients.CatalogClient import CatalogClient
from shop2020.thriftpy.model.v1.catalog.ttypes import status

class BaseHandler(tornado.web.RequestHandler):
    def get_current_user(self):
        return self.get_secure_cookie('userauth')
    
class LoginHandler(BaseHandler):
    def get(self):
        self.loader = tornado.template.Loader('HTMLTemplates')
        self.write(self.loader.load('LoginForm.html').generate())
    
    def post(self):
        config = ConfigParser.SafeConfigParser()
        config.read('app.cfg')
        
        username = self.get_argument('username')
        password = self.get_argument('password')
        
        if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
            print 'Password Matched'
            self.set_secure_cookie("userauth", username + '_' + password)
            self.redirect('/')
        else:
            self.redirect('/login')

class URLFeedbackHandler(BaseHandler):
    url_feedback_file = '/usr/price-comp-dashboard/urls.json'
    secondary_crawl_file = '/usr/price-comp-dashboard/secondary-crawl.json'
    
    def post(self):
        try:
            fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
            urls = json.load(fp_read)
            
        except ValueError as e:
            print e
            urls = {}
        finally:
            fp_read.close()
        
        print 'Existing URLs: ', urls
        
        entity = self.get_argument('entity')
        source = self.get_argument('source')
        url = self.get_argument('url')
        
        if entity in urls:
            urls[entity][source] = url
        else:
            urls[entity] = {source: url}
        
        print 'New set of URLs: ', urls
        fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
        json.dump(urls, fp_write, indent = 4)
        fp_write.close()
        
        #Scraping the page
        scraper = getScraper(source)
        productData = scraper.getDataFromProductPage(url)
        
        #Storing the data
        try:
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
            data = json.load(fp_read)
            
        except ValueError as e:
            print e
            data = {}
        finally:
            fp_read.close()
        
        if entity in data:
            data[entity][source] = productData
        else:
            data[entity] = {source: productData}
        
        print 'Secondary crawled data:', data
        
        fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
        json.dump(data, fp_write, indent = 4)
        fp_write.close()
        
        productData['entityId'] = entity
        self.write(productData)
    
    def get(self):
        try:
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
            data = json.load(fp_read)
            
        except ValueError as e:
            print e
            data = {}
        finally:
            fp_read.close()
        self.write(data)

class FeedbackHandler(BaseHandler):
    
    def save(self, entity, source, feedback_type, selected_item = None):
        self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
        file_to_read = open(self.feedback_file, 'r')
        
        feedbacks_json = file_to_read.read()
        file_to_read.close()
        
        feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
        
        if entity not in feedbacks: feedbacks[entity] = {}
        
        feedbacks[entity][source] = {'type': feedback_type}
        
        if selected_item is not None:   feedbacks[entity][source]['selected_item'] = selected_item
        
        file_to_write = open(self.feedback_file, 'w')
        json.dump(feedbacks, file_to_write, indent = 4)
        file_to_write.close()
        
    def post(self):
        feedback_type = self.get_argument('type')
        entity_id = self.get_argument('entityId')
        price_data_source = self.get_argument('source')
        
        print feedback_type, entity_id, price_data_source
        
        if feedback_type == 'select':
            selected_item = self.get_argument('selected')
            print selected_item
            self.save(entity_id, price_data_source, feedback_type, selected_item)
        else:
            self.save(entity_id, price_data_source, feedback_type)
    
    def get(self):
        print 'GET: Feedback data'
        self.feedback_file = '/usr/price-comp-dashboard/feedback.json'
        file_to_read = open(self.feedback_file, 'r')
        
        feedbacks_json = file_to_read.read()
        file_to_read.close()
        
        self.write(feedbacks_json)
        
class MainHandler(BaseHandler):

    def mapSearchUrls(self, map, name):
        
        search_urls = {
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
            'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
            'snapdeal': '$$',
            'sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go',
            'tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'
        }
        
        for key in search_urls.iterkeys():
            try:
                if map[key]['url'] == 'Not Found':
                    map[key]['url'] = search_urls[key].replace('$$', name)
            except KeyError:
                map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
        return map
    
    
    @tornado.web.authenticated
    def get(self):
        self.loader = tornado.template.Loader('HTMLTemplates')
        catalog_client = CatalogClient().get_client()
        items = catalog_client.getAllItemsByStatus(status.ACTIVE)
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
#        synonyms = getSynonyms()
#        print synonyms
        retriever = Retriever()
        products = {}
        
        for item in items:
            if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003):  products[item.catalogItemId] = item
        
        comparative_prices = []
        
        for item in sorted(products.itervalues(), key = lambda item: item.brand):
            try:
                model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
                model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
                
                #synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
                
                search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
                
                clusters = getProductClusters(search_results)
                filtered_clusters = getFilteredClustersWithTopScores(clusters)
                
                product_name = "%s " % item.brand
                product_name += "%s " % model_name if model_name is not None else ''
                product_name += model_number if model_number is not None else ''
                
                display_info = getDisplayInfo(filtered_clusters, product_name)
                print 'Display Info: ', display_info
                
                display_info['entity_id'] = item.catalogItemId
                display_info['product_name'] = product_name
                display_info['saholic'] = {'price': item.sellingPrice}
                comparative_prices.append(display_info)
            except Exception as e:
                print 'Exception for %s:' % item.catalogItemId, e
        
        json.dump(comparative_prices, open('/usr/pcd_log', 'w'), indent = 4)
        self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
        
class WatchlistHandler(BaseHandler):
    
    @tornado.web.authenticated
    def get(self):
        watchlistManager = WatchListManager()
        watchlist = watchlistManager.getWatchlist()
        print 'Getting watchlist: ', watchlist
        entityIds = []
        
        for id in watchlist:
            entityIds.append(int(id))
        
        self.write(str(entityIds))
    
    def post(self):
        watchlistManager = WatchListManager()
        
        requestType = self.get_argument('type').strip()
        entityId = self.get_argument('entity')
        
        print 'Request Type:', requestType, ', Entity Id: ', entityId
        
        if requestType == 'save':
            watchlistManager.save(entity = entityId)
            
        elif requestType == 'delete':
            watchlistManager.remove(entity = entityId)
        
        self.write("{}")

class DownloadHandler(BaseHandler):
    
    def post(self):
        catalog_client = CatalogClient().get_client()
        retriever = Retriever()
        vendors = {}
        
        for vendor in catalog_client.getAllVendors():
            vendors[vendor.id] = vendor.name
        
        self.set_header('Content-Type', 'text/csv')
        self.set_header("Content-disposition", "inline; filename=price-comparison.xls")

        newLine = '\n'
        tab = '\t'

        header = 'Product' + tab
        header += 'Vendor' + tab + 'TP' + tab + 'Vendor' + tab + 'TP' + tab +  'Vendor' + tab + 'TP' + tab
        header += 'Saholic' + tab + 'Flipkart' + tab + 'Homeshop18' + tab + 'Infibeam' + tab +  'Snapdeal' + newLine
        
        responseText = header
        
        for item in getValidItems():
            vendorItemPricings = catalog_client.getAllItemPricing(item.id)
            sortedPricings = sorted(vendorItemPricings, key = lambda vendorItemPricing: vendorItemPricing.transferPrice)
            productName = getProductName(item)
            
            row = productName + tab
            
            if len(sortedPricings) > 0:
                row += vendors[sortedPricings[0].vendorId] + tab + str(sortedPricings[0].transferPrice) + tab
            else:
                row += tab + tab
            
            if len(sortedPricings) > 1:
                row += vendors[sortedPricings[1].vendorId] + tab + str(sortedPricings[1].transferPrice) + tab
            else:
                row += tab + tab
            
            if len(sortedPricings) > 2:
                row += vendors[sortedPricings[2].vendorId] + tab + str(sortedPricings[2].transferPrice) + tab
            else:
                row += tab + tab
            
            row += str(item.sellingPrice) + tab
            
            model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
            model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
            
            search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
            
            clusters = getProductClusters(search_results)
            filtered_clusters = getFilteredClustersWithTopScores(clusters)
            display_info = getDisplayInfo(filtered_clusters, productName)
            
            if 'price' in display_info['flipkart'] and display_info['flipkart']['price'] is not None:
                row += display_info['flipkart']['price'] + tab
            else:
                row += tab
            
            if 'price' in display_info['homeshop18'] and display_info['homeshop18']['price'] is not None:
                row += display_info['homeshop18']['price'] + tab
            else:
                row += tab
            
            if 'price' in display_info['infibeam'] and display_info['infibeam']['price'] is not None:
                row += display_info['infibeam']['price'] + tab
            else:
                row += tab
            
            if 'price' in display_info['snapdeal'] and display_info['snapdeal']['price'] is not None:
                row += display_info['snapdeal']['price'] + tab
            else:
                row += tab
                
            if 'price' in display_info['sulekha'] and display_info['sulekha']['price'] is not None:
                row += display_info['sulekha']['price'] + tab
            else:
                row += tab
            
            if 'price' in display_info['tradus'] and display_info['tradus']['price'] is not None:
                row += display_info['tradus']['price'] + tab
            else:
                row += tab
            responseText += row + newLine
        
        self.write(responseText)
    
settings  = {
        'static_path': os.path.join(os.path.dirname(__file__), 'static'),
        'login_url': '/login', 
        'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
}

application = tornado.web.Application([
                (r"/", MainHandler),
                (r"/login", LoginHandler),
                (r"/feedback", FeedbackHandler),
                (r"/feedback-url", URLFeedbackHandler),
                (r"/watchlist", WatchlistHandler),
                (r"/download", DownloadHandler),
                (r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
            ], **settings)

if __name__ == '__main__':
    http_server = tornado.httpserver.HTTPServer(application)
    http_server.listen(8889)
    tornado.ioloop.IOLoop.instance().start()
Subversion Repositories SmartDukaan

(root)/trunk/PriceComparisonFramework/src/WebUI/PriceComparisonServer.py – Rev 6170