| 3232 |
varun.gupt |
1 |
'''
|
|
|
2 |
Created on 26-Aug-2011
|
|
|
3 |
|
|
|
4 |
@author: Varun Gupta
|
|
|
5 |
'''
|
| 3350 |
varun.gupt |
6 |
import json, sys, os
|
| 3232 |
varun.gupt |
7 |
|
| 3350 |
varun.gupt |
8 |
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
|
|
|
9 |
if cmd_folder not in sys.path:
|
|
|
10 |
sys.path.insert(0, cmd_folder)
|
|
|
11 |
|
|
|
12 |
from shop2020.clients.CatalogClient import CatalogClient
|
|
|
13 |
|
| 3232 |
varun.gupt |
14 |
def isValidRule(rule):
|
|
|
15 |
try:
|
|
|
16 |
if rule is None:
|
|
|
17 |
return False
|
|
|
18 |
|
|
|
19 |
elif rule['url'] is None:
|
|
|
20 |
return False
|
|
|
21 |
|
|
|
22 |
elif rule['source'] is None:
|
|
|
23 |
return False
|
|
|
24 |
|
|
|
25 |
else:
|
|
|
26 |
return True
|
|
|
27 |
|
|
|
28 |
except KeyError:
|
|
|
29 |
return False
|
|
|
30 |
|
|
|
31 |
def extractBrandAndName(full_name):
|
| 3350 |
varun.gupt |
32 |
# brands = ('Micromax', 'BlackBerry', 'Blackberry', 'Motorola', 'Alcatel', 'Sony Ericsson', 'Apple', 'Spice', 'Nokia', 'HTC', 'Samsung', 'LG', 'Dell')
|
| 3232 |
varun.gupt |
33 |
|
| 3350 |
varun.gupt |
34 |
try:
|
|
|
35 |
client = CatalogClient().get_client()
|
|
|
36 |
brands = client.getAllBrandsByCategory(10001)
|
|
|
37 |
brands.append('Blackberry') #To resolve issue of 'BlackBerry' and 'Blackberry'
|
|
|
38 |
|
|
|
39 |
print brands
|
|
|
40 |
|
|
|
41 |
for brand in brands:
|
|
|
42 |
if full_name.startswith(brand): return (brand, full_name.replace(brand, '').strip())
|
|
|
43 |
except Exception as e:
|
|
|
44 |
print e
|
| 3232 |
varun.gupt |
45 |
|
|
|
46 |
return ("", full_name)
|
|
|
47 |
|
|
|
48 |
def getItemsWithTopScore(items):
|
|
|
49 |
filterd_items = []
|
|
|
50 |
top_score = -1.0
|
|
|
51 |
|
|
|
52 |
for item in items:
|
|
|
53 |
if item['score'] >= top_score:
|
|
|
54 |
filterd_items.append(item)
|
|
|
55 |
top_score = item['score']
|
|
|
56 |
else:
|
|
|
57 |
return filterd_items
|
|
|
58 |
|
|
|
59 |
return filterd_items
|
|
|
60 |
|
|
|
61 |
def isPriceSame(items):
|
|
|
62 |
for i in range(0, items.__len__() - 1):
|
|
|
63 |
if items[i]['price'] != items[i + 1]['price']: return False
|
|
|
64 |
|
| 3313 |
varun.gupt |
65 |
return True
|
|
|
66 |
|
|
|
67 |
def getProductClusters(products):
|
|
|
68 |
'''
|
|
|
69 |
Receives a list of products (returned from search results) &
|
|
|
70 |
returns a clustered dictionary, where products are grouped by
|
|
|
71 |
the 'source'
|
|
|
72 |
'''
|
|
|
73 |
clustered_results = {'flipkart': [], 'homeshop18': [], 'infibeam': [], 'letsbuy': []}
|
|
|
74 |
|
|
|
75 |
for product in products:
|
|
|
76 |
clustered_results[product['source']].append(product)
|
|
|
77 |
|
|
|
78 |
return clustered_results
|
|
|
79 |
|
|
|
80 |
def getFilteredClustersWithTopScores(product_clusters):
|
|
|
81 |
filtered_cluster = {}
|
|
|
82 |
|
|
|
83 |
for source, products in product_clusters.iteritems():
|
|
|
84 |
filtered_cluster[source] = getItemsWithTopScore(products)
|
|
|
85 |
|
|
|
86 |
return filtered_cluster
|
|
|
87 |
|
|
|
88 |
def removePriceFormatting(price_string):
|
|
|
89 |
return price_string.replace('Rs.', '').replace(',', '').strip()
|
|
|
90 |
|
|
|
91 |
def getDisplayInfo(filtered_cluster):
|
|
|
92 |
display_info = {'flipkart': {}, 'homeshop18': {}, 'infibeam': {}, 'letsbuy': {}}
|
|
|
93 |
|
|
|
94 |
for source, products in filtered_cluster.iteritems():
|
|
|
95 |
|
|
|
96 |
if len(products) > 0:
|
|
|
97 |
if isPriceSame(products):
|
|
|
98 |
display_info[source]['price'] = products[0]['price']
|
|
|
99 |
display_info[source]['data'] = None
|
|
|
100 |
display_info[source]['url'] = products[0]['url']
|
|
|
101 |
display_info[source]['text'] = removePriceFormatting(products[0]['price'])
|
|
|
102 |
else:
|
|
|
103 |
display_info[source]['price'] = None
|
|
|
104 |
display_info[source]['data'] = json.dumps(products)
|
|
|
105 |
display_info[source]['url'] = None
|
|
|
106 |
display_info[source]['text'] = 'Conflict'
|
|
|
107 |
else:
|
|
|
108 |
display_info[source]['price'] = None
|
|
|
109 |
display_info[source]['data'] = None
|
|
|
110 |
display_info[source]['url'] = None
|
|
|
111 |
display_info[source]['text'] = 'Not Found'
|
|
|
112 |
|
|
|
113 |
return display_info
|