Subversion Repositories SmartDukaan

Rev

Rev 4198 | Rev 5377 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 31-Aug-2011

@author: Varun Gupta
'''
import tornado.httpserver, tornado.ioloop, tornado.web
import json, os, ConfigParser, sys
from PyLucene.Retriever import Retriever
from Utils import getItemsWithTopScore, isPriceSame, getProductClusters, getFilteredClustersWithTopScores, \
    getDisplayInfo, getSynonyms
from ScraperLoader import getScraper
from PyLucene.IndexBuilder import IndexBuilder
from DataStore.WatchListManager import WatchListManager

cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
if cmd_folder not in sys.path:
    sys.path.insert(0, cmd_folder)

from shop2020.clients.CatalogClient import CatalogClient
from shop2020.thriftpy.model.v1.catalog.ttypes import status

class BaseHandler(tornado.web.RequestHandler):
    def get_current_user(self):
        return self.get_secure_cookie('userauth')
    
class LoginHandler(BaseHandler):
    def get(self):
        self.loader = tornado.template.Loader('HTMLTemplates')
        self.write(self.loader.load('LoginForm.html').generate())
    
    def post(self):
        config = ConfigParser.SafeConfigParser()
        config.read('app.cfg')
        
        username = self.get_argument('username')
        password = self.get_argument('password')
        
        if username == config.get('auth', 'username') and password == config.get('auth', 'password'):
            print 'Password Matched'
            self.set_secure_cookie("userauth", username + '_' + password)
            self.redirect('/')
        else:
            self.redirect('/login')

class URLFeedbackHandler(BaseHandler):
    url_feedback_file = '/tmp/price-comp-dashboard/urls.json'
    secondary_crawl_file = '/tmp/price-comp-dashboard/secondary-crawl.json'
    
    def post(self):
        try:
            fp_read = open(URLFeedbackHandler.url_feedback_file, 'r')
            urls = json.load(fp_read)
            
        except ValueError as e:
            print e
            urls = {}
        finally:
            fp_read.close()
        
        print 'Existing URLs: ', urls
        
        entity = self.get_argument('entity')
        source = self.get_argument('source')
        url = self.get_argument('url')
        
        if entity in urls:
            urls[entity][source] = url
        else:
            urls[entity] = {source: url}
        
        print 'New set of URLs: ', urls
        fp_write = open(URLFeedbackHandler.url_feedback_file, 'w')
        json.dump(urls, fp_write, indent = 4)
        fp_write.close()
        
        #Scraping the page
        scraper = getScraper(source)
        productData = scraper.getDataFromProductPage(url)
        
        #Storing the data
        try:
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
            data = json.load(fp_read)
            
        except ValueError as e:
            print e
            data = {}
        finally:
            fp_read.close()
        
        if entity in data:
            data[entity][source] = productData
        else:
            data[entity] = {source: productData}
        
        print 'Secondary crawled data:', data
        
        fp_write = open(URLFeedbackHandler.secondary_crawl_file, 'w')
        json.dump(data, fp_write, indent = 4)
        fp_write.close()
        
        productData['entityId'] = entity
        self.write(productData)
    
    def get(self):
        try:
            fp_read = open(URLFeedbackHandler.secondary_crawl_file, 'r')
            data = json.load(fp_read)
            
        except ValueError as e:
            print e
            data = {}
        finally:
            fp_read.close()
        self.write(data)

class FeedbackHandler(BaseHandler):
    
    def save(self, entity, source, feedback_type, selected_item = None):
        self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
        file_to_read = open(self.feedback_file, 'r')
        
        feedbacks_json = file_to_read.read()
        file_to_read.close()
        
        feedbacks = json.loads(feedbacks_json) if len(feedbacks_json) > 1 else {}
        
        if entity not in feedbacks: feedbacks[entity] = {}
        
        feedbacks[entity][source] = {'type': feedback_type}
        
        if selected_item is not None:   feedbacks[entity][source]['selected_item'] = selected_item
        
        file_to_write = open(self.feedback_file, 'w')
        json.dump(feedbacks, file_to_write, indent = 4)
        file_to_write.close()
        
    def post(self):
        feedback_type = self.get_argument('type')
        entity_id = self.get_argument('entityId')
        price_data_source = self.get_argument('source')
        
        print feedback_type, entity_id, price_data_source
        
        if feedback_type == 'select':
            selected_item = self.get_argument('selected')
            print selected_item
            self.save(entity_id, price_data_source, feedback_type, selected_item)
        else:
            self.save(entity_id, price_data_source, feedback_type)
    
    def get(self):
        print 'GET: Feedback data'
        self.feedback_file = '/tmp/price-comp-dashboard/feedback.json'
        file_to_read = open(self.feedback_file, 'r')
        
        feedbacks_json = file_to_read.read()
        file_to_read.close()
        
        self.write(feedbacks_json)
        
class MainHandler(BaseHandler):

    def mapSearchUrls(self, map, name):
        
        search_urls = {
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
            'homeshop18': 'http://www.homeshop18.com/nokia%20n97/search:$$/categoryid:3024',
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
            'snapdeal': '$$'
        }
        
        for key in search_urls.iterkeys():
            try:
                if map[key]['url'] == 'Not Found':
                    map[key]['url'] = search_urls[key].replace('$$', name)
            except KeyError:
                map[key] = {'price': 'Not Found', 'url': search_urls[key].replace('$$', name)}
        return map
    
    
    @tornado.web.authenticated
    def get(self):
        self.loader = tornado.template.Loader('HTMLTemplates')
        catalog_client = CatalogClient().get_client()
        items = catalog_client.getAllItemsByStatus(status.ACTIVE)
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
        items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
#        synonyms = getSynonyms()
#        print synonyms
        retriever = Retriever()
        products = {}
        
        for item in items:
            if item.category in (10002, 10003, 10004, 10005, 10010):  products[item.catalogItemId] = item
        
        comparative_prices = []
        
        for item in sorted(products.itervalues(), key = lambda item: item.brand):
            try:
                model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
                model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
                
                #synonyms_for_this_model = synonyms[item.catalogItemId] if item.catalogItemId in synonyms else None
                
                search_results = retriever.retrieve(model_number = model_number, model_name = model_name, brand = item.brand, synonyms = None)
                
                clusters = getProductClusters(search_results)
                filtered_clusters = getFilteredClustersWithTopScores(clusters)
                
                product_name = "%s " % item.brand
                product_name += "%s " % model_name if model_name is not None else ''
                product_name += model_number if model_number is not None else ''
                
                display_info = getDisplayInfo(filtered_clusters, product_name)
                print 'Display Info: ', display_info
                
                display_info['entity_id'] = item.catalogItemId
                display_info['product_name'] = product_name
                display_info['saholic'] = {'price': item.sellingPrice}
                comparative_prices.append(display_info)
            except Exception as e:
                print 'Exception:', e
        
        json.dump(comparative_prices, open('/tmp/pcd_log', 'w'), indent = 4)
        self.write(self.loader.load('PriceChart.html').generate(data = comparative_prices))
        
class WatchlistHandler(BaseHandler):
    
    @tornado.web.authenticated
    def get(self):
        watchlistManager = WatchListManager()
        watchlist = watchlistManager.getWatchlist()
        print 'Getting watchlist: ', watchlist
        entityIds = []
        
        for id in watchlist:
            entityIds.append(int(id))
        
        self.write(str(entityIds))
    
    def post(self):
        watchlistManager = WatchListManager()
        
        requestType = self.get_argument('type').strip()
        entityId = self.get_argument('entity')
        
        print 'Request Type:', requestType, ', Entity Id: ', entityId
        
        if requestType == 'save':
            watchlistManager.save(entity = entityId)
            
        elif requestType == 'delete':
            watchlistManager.remove(entity = entityId)
        
        self.write("{}")

settings  = {
        'static_path': os.path.join(os.path.dirname(__file__), 'static'),
        'login_url': '/login', 
        'cookie_secret' :"61oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo="
}

application = tornado.web.Application([
                (r"/", MainHandler),
                (r"/login", LoginHandler),
                (r"/feedback", FeedbackHandler),
                (r"/feedback-url", URLFeedbackHandler),
                (r"/watchlist", WatchlistHandler),
                (r"/(jquery-1.6.2.min\.js)", tornado.web.StaticFileHandler, dict(path=settings['static_path']))
            ], **settings)

if __name__ == '__main__':
    http_server = tornado.httpserver.HTTPServer(application)
    http_server.listen(8889)
    tornado.ioloop.IOLoop.instance().start()