Subversion Repositories SmartDukaan

Rev

Rev 6183 | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 26-Aug-2011

@author: Varun Gupta
'''
import json, sys, os

cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
if cmd_folder not in sys.path:
    sys.path.insert(0, cmd_folder)

from shop2020.clients.CatalogClient import CatalogClient
from shop2020.thriftpy.model.v1.catalog.ttypes import status

CHARACTER_ENCODING = 'ISO-8859-1'

class BrandAndModelExtracter:
    
    def __init__(self):
        
        try:
            client = CatalogClient().get_client()
            self.brands = client.getAllBrandsByCategory(10001)
            self.brands.extend(client.getAllBrandsByCategory(11001))
        except Exception:
            self.brands = ['Micromax', 'BlackBerry', 'Blackberry', 'Motorola', 'Alcatel', 'Sony Ericsson', 'Apple', \
                      'Spice', 'Nokia', 'HTC', 'Samsung', 'LG', 'Dell', 'Karbonn', 'Lava', 'Canon', 'Nikon', 'Sony']
        
        self.brands.append('Blackberry') #To resolve issue of 'BlackBerry' and 'Blackberry'
    
    def extract(self, full_name):
        full_name = full_name.strip()
        
        for brand in self.brands:
            if full_name.startswith(brand):  return (brand, full_name.replace(brand, '').strip())
        
        return ("", full_name)

class DuplicateMappingTracker:
    
    def __init__(self):
        self.mapping = {}
    
    def track(self, url, entity_id):
        if url in self.mapping:
            self.mapping[url].append(entity_id)
        else:
            self.mapping[url] = [entity_id]
    
    def getDuplicateMappings(self):
        
        duplicate_mappings = {}
        
        for url, entities in duplicate_mappings.iteritems():
            if len(entities) > 1:
                duplicate_mappings[url] = entities
        
        return duplicate_mappings
    
def getURLSource(url):
    try:
        return str(url.split('.')[1].strip())
    except Exception:
        return None

def isValidRule(rule):
    try:
        if rule is None:
            return False
        
        elif rule['url'] is None:
            return False
        
        elif rule['source'] is None:
            return False
    
        else:
            return True
    
    except KeyError:
        return False

def getItemsWithTopScore(items):
    filterd_items = []
    top_score = -1.0
    
    for item in items:
        if item['score'] >= top_score:
            filterd_items.append(item)
            top_score = item['score'] 
        else:
            return filterd_items
    
    return filterd_items

def isPriceSame(items):
    for i in range(0, items.__len__() - 1):
        if items[i]['price'] != items[i + 1]['price']:    return False
    
    return True

def getProductClusters(products):
    '''
    Receives a list of products (returned from search results) &
    returns a clustered dictionary, where products are grouped by
    the 'source'
    '''
    clustered_results = {'adexmart': [], 'flipkart': [], 'homeshop18': [], 'infibeam': [], 'snapdeal': [], 'tradus': [], 'sulekha': []}
    
    for product in products:
        clustered_results[product['source']].append(product)
    
    return clustered_results

def getFilteredClustersWithTopScores(product_clusters):
    filtered_cluster = {}
    
    for source, products in product_clusters.iteritems():
        filtered_cluster[source] = getItemsWithTopScore(products)
    
    return filtered_cluster

def removePriceFormatting(price_string):
    return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace(' ', '').split('.')[0]
    
def getSearchURL(source, name):
        
        search_urls = {
            'flipkart': 'http://www.flipkart.com/search/a/all?query=$$',
            'homeshop18': 'http://www.homeshop18.com/search:$$',
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
            'infibeam': 'http://www.infibeam.com/search?q=$$',
            'snapdeal': 'http://www.snapdeal.com/search?catId=&categoryId=12&locUsed=false&vertical=p&keyword=$$',
            'sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go',
            'tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'
    
        }
        return search_urls[source].replace('$$', name)

def getDisplayInfo(filtered_cluster, product_name):
    display_info = {'adexmart': {}, 'flipkart': {}, 'homeshop18': {}, 'infibeam': {}, 'snapdeal': {}, 'tradus':{}, 'sulekha':{}}
    
    for source, products in filtered_cluster.iteritems():
        
        if len(products) > 0:
            if isPriceSame(products):
                display_info[source]['price'] = products[0]['price']
                display_info[source]['data'] = None
                display_info[source]['url'] = products[0]['url']
                display_info[source]['text'] = removePriceFormatting(products[0]['price'])
                display_info[source]['title'] = products[0]['name']
            else:
                display_info[source]['price'] = None
                display_info[source]['data'] = json.dumps(products)
                display_info[source]['url'] = None
                display_info[source]['text'] = 'Conflict'
        else:
            display_info[source]['price'] = None
            display_info[source]['data'] = None
            display_info[source]['url'] = getSearchURL(source, product_name)
            display_info[source]['text'] = 'Not Found'
    
    return display_info

def getSynonyms():
    file_path = '/usr/price-comp-dashboard/synonyms.json'
    file = open(file_path, 'r')
    synonyms_json = file.read()

    synonyms = {}
    for key, value in json.loads(synonyms_json).iteritems():
        list_synonyms = []
        
        if 'MODEL_NAME' in value:
            list_synonyms.extend(value['MODEL_NAME'])
            
        if 'MODEL_NUMBER' in value:
            list_synonyms.extend(value['MODEL_NUMBER'])
        
        synonyms[int(key)] = list_synonyms
    
    return synonyms

def getValidItems():
    catalog_client = CatalogClient().get_client()
    items = catalog_client.getAllItemsByStatus(status.ACTIVE)
    items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
    items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
    products = {}
    
    for item in items:
        if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003):  products[item.catalogItemId] = item
    
    return sorted(products.itervalues(), key = lambda item: item.brand)

def getProductName(item):
    model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
    model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
    
    product_name = "%s " % item.brand
    product_name += "%s " % model_name if model_name is not None else ''
    product_name += model_number if model_number is not None else ''
    
    return product_name

if __name__ == '__main__':
    extracter = BrandAndModelExtracter()
#    print extracter.extract('Nokia X5-01 (Pink)')
    print getSynonyms()