Rev 6183 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 26-Aug-2011@author: Varun Gupta'''import json, sys, oscmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))if cmd_folder not in sys.path:sys.path.insert(0, cmd_folder)from shop2020.clients.CatalogClient import CatalogClientfrom shop2020.thriftpy.model.v1.catalog.ttypes import statusCHARACTER_ENCODING = 'ISO-8859-1'class BrandAndModelExtracter:def __init__(self):try:client = CatalogClient().get_client()self.brands = client.getAllBrandsByCategory(10001)self.brands.extend(client.getAllBrandsByCategory(11001))except Exception:self.brands = ['Micromax', 'BlackBerry', 'Blackberry', 'Motorola', 'Alcatel', 'Sony Ericsson', 'Apple', \'Spice', 'Nokia', 'HTC', 'Samsung', 'LG', 'Dell', 'Karbonn', 'Lava', 'Canon', 'Nikon', 'Sony']self.brands.append('Blackberry') #To resolve issue of 'BlackBerry' and 'Blackberry'def extract(self, full_name):full_name = full_name.strip()for brand in self.brands:if full_name.startswith(brand): return (brand, full_name.replace(brand, '').strip())return ("", full_name)class DuplicateMappingTracker:def __init__(self):self.mapping = {}def track(self, url, entity_id):if url in self.mapping:self.mapping[url].append(entity_id)else:self.mapping[url] = [entity_id]def getDuplicateMappings(self):duplicate_mappings = {}for url, entities in duplicate_mappings.iteritems():if len(entities) > 1:duplicate_mappings[url] = entitiesreturn duplicate_mappingsdef getURLSource(url):try:return str(url.split('.')[1].strip())except Exception:return Nonedef isValidRule(rule):try:if rule is None:return Falseelif rule['url'] is None:return Falseelif rule['source'] is None:return Falseelse:return Trueexcept KeyError:return Falsedef getItemsWithTopScore(items):filterd_items = []top_score = -1.0for item in items:if item['score'] >= top_score:filterd_items.append(item)top_score = item['score']else:return filterd_itemsreturn filterd_itemsdef isPriceSame(items):for i in range(0, items.__len__() - 1):if items[i]['price'] != items[i + 1]['price']: return Falsereturn Truedef getProductClusters(products):'''Receives a list of products (returned from search results) &returns a clustered dictionary, where products are grouped bythe 'source''''clustered_results = {'adexmart': [], 'flipkart': [], 'homeshop18': [], 'infibeam': [], 'snapdeal': [], 'tradus': [], 'sulekha': []}for product in products:clustered_results[product['source']].append(product)return clustered_resultsdef getFilteredClustersWithTopScores(product_clusters):filtered_cluster = {}for source, products in product_clusters.iteritems():filtered_cluster[source] = getItemsWithTopScore(products)return filtered_clusterdef removePriceFormatting(price_string):return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace(' ', '').split('.')[0]def getSearchURL(source, name):search_urls = {'flipkart': 'http://www.flipkart.com/search/a/all?query=$$','homeshop18': 'http://www.homeshop18.com/search:$$','adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$','infibeam': 'http://www.infibeam.com/search?q=$$','snapdeal': 'http://www.snapdeal.com/search?catId=&categoryId=12&locUsed=false&vertical=p&keyword=$$','sulekha': 'http://mobiles.sulekha.com/search.htm?cx=partner-pub-3470583419345383%3A8ggsimfcaaa&cof=FORID%3A10&ie=ISO-8859-1&q=$$&sa=Go','tradus': 'http://www.tradus.com/search/tradus_search/?query=$$'}return search_urls[source].replace('$$', name)def getDisplayInfo(filtered_cluster, product_name):display_info = {'adexmart': {}, 'flipkart': {}, 'homeshop18': {}, 'infibeam': {}, 'snapdeal': {}, 'tradus':{}, 'sulekha':{}}for source, products in filtered_cluster.iteritems():if len(products) > 0:if isPriceSame(products):display_info[source]['price'] = products[0]['price']display_info[source]['data'] = Nonedisplay_info[source]['url'] = products[0]['url']display_info[source]['text'] = removePriceFormatting(products[0]['price'])display_info[source]['title'] = products[0]['name']else:display_info[source]['price'] = Nonedisplay_info[source]['data'] = json.dumps(products)display_info[source]['url'] = Nonedisplay_info[source]['text'] = 'Conflict'else:display_info[source]['price'] = Nonedisplay_info[source]['data'] = Nonedisplay_info[source]['url'] = getSearchURL(source, product_name)display_info[source]['text'] = 'Not Found'return display_infodef getSynonyms():file_path = '/usr/price-comp-dashboard/synonyms.json'file = open(file_path, 'r')synonyms_json = file.read()synonyms = {}for key, value in json.loads(synonyms_json).iteritems():list_synonyms = []if 'MODEL_NAME' in value:list_synonyms.extend(value['MODEL_NAME'])if 'MODEL_NUMBER' in value:list_synonyms.extend(value['MODEL_NUMBER'])synonyms[int(key)] = list_synonymsreturn synonymsdef getValidItems():catalog_client = CatalogClient().get_client()items = catalog_client.getAllItemsByStatus(status.ACTIVE)items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))products = {}for item in items:if item.category in (10002, 10003, 10004, 10005, 10010, 11002, 11003): products[item.catalogItemId] = itemreturn sorted(products.itervalues(), key = lambda item: item.brand)def getProductName(item):model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else Nonemodel_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else Noneproduct_name = "%s " % item.brandproduct_name += "%s " % model_name if model_name is not None else ''product_name += model_number if model_number is not None else ''return product_nameif __name__ == '__main__':extracter = BrandAndModelExtracter()# print extracter.extract('Nokia X5-01 (Pink)')print getSynonyms()