Subversion Repositories SmartDukaan

Rev

Rev 3350 | Rev 4039 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 26-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
3350 varun.gupt 6
import json, sys, os
3232 varun.gupt 7
 
3350 varun.gupt 8
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
9
if cmd_folder not in sys.path:
10
    sys.path.insert(0, cmd_folder)
11
 
12
from shop2020.clients.CatalogClient import CatalogClient
13
 
3453 varun.gupt 14
CHARACTER_ENCODING = 'ISO-8859-1'
15
 
16
class BrandAndModelExtracter:
17
 
18
    def __init__(self):
19
 
20
        try:
21
            client = CatalogClient().get_client()
22
            self.brands = client.getAllBrandsByCategory(10001)
23
        except Exception:
24
            self.brands = ['Micromax', 'BlackBerry', 'Blackberry', 'Motorola', 'Alcatel', 'Sony Ericsson', 'Apple', \
25
                      'Spice', 'Nokia', 'HTC', 'Samsung', 'LG', 'Dell', 'Karbonn', 'Lava']
26
 
27
        self.brands.append('Blackberry') #To resolve issue of 'BlackBerry' and 'Blackberry'
28
 
29
    def extract(self, full_name):
30
 
31
        for brand in self.brands:
32
            if full_name.startswith(brand):  return (brand, full_name.replace(brand, '').strip())
33
 
34
        return ("", full_name)
35
 
3232 varun.gupt 36
def isValidRule(rule):
37
    try:
38
        if rule is None:
39
            return False
40
 
41
        elif rule['url'] is None:
42
            return False
43
 
44
        elif rule['source'] is None:
45
            return False
46
 
47
        else:
48
            return True
49
 
50
    except KeyError:
51
        return False
52
 
53
def getItemsWithTopScore(items):
54
    filterd_items = []
55
    top_score = -1.0
56
 
57
    for item in items:
58
        if item['score'] >= top_score:
59
            filterd_items.append(item)
60
            top_score = item['score'] 
61
        else:
62
            return filterd_items
63
 
64
    return filterd_items
65
 
66
def isPriceSame(items):
67
    for i in range(0, items.__len__() - 1):
68
        if items[i]['price'] != items[i + 1]['price']:    return False
69
 
3313 varun.gupt 70
    return True
71
 
72
def getProductClusters(products):
73
    '''
74
    Receives a list of products (returned from search results) &
75
    returns a clustered dictionary, where products are grouped by
76
    the 'source'
77
    '''
78
    clustered_results = {'flipkart': [], 'homeshop18': [], 'infibeam': [], 'letsbuy': []}
79
 
80
    for product in products:
81
        clustered_results[product['source']].append(product)
82
 
83
    return clustered_results
84
 
85
def getFilteredClustersWithTopScores(product_clusters):
86
    filtered_cluster = {}
87
 
88
    for source, products in product_clusters.iteritems():
89
        filtered_cluster[source] = getItemsWithTopScore(products)
90
 
91
    return filtered_cluster
92
 
93
def removePriceFormatting(price_string):
94
    return price_string.replace('Rs.', '').replace(',', '').strip()
95
 
96
def getDisplayInfo(filtered_cluster):
97
    display_info = {'flipkart': {}, 'homeshop18': {}, 'infibeam': {}, 'letsbuy': {}}
98
 
99
    for source, products in filtered_cluster.iteritems():
100
 
101
        if len(products) > 0:
102
            if isPriceSame(products):
103
                display_info[source]['price'] = products[0]['price']
104
                display_info[source]['data'] = None
105
                display_info[source]['url'] = products[0]['url']
106
                display_info[source]['text'] = removePriceFormatting(products[0]['price'])
3453 varun.gupt 107
                display_info[source]['title'] = products[0]['name']
3313 varun.gupt 108
            else:
109
                display_info[source]['price'] = None
110
                display_info[source]['data'] = json.dumps(products)
111
                display_info[source]['url'] = None
112
                display_info[source]['text'] = 'Conflict'
113
        else:
114
            display_info[source]['price'] = None
115
            display_info[source]['data'] = None
116
            display_info[source]['url'] = None
117
            display_info[source]['text'] = 'Not Found'
118
 
3453 varun.gupt 119
    return display_info
120
 
121
def getSynonyms():
122
    file_path = '/tmp/synonyms.json'
123
    file = open(file_path, 'r')
124
    synonyms_json = file.read()
125
 
126
    synonyms = {}
127
    for key, value in json.loads(synonyms_json).iteritems():
128
        list_synonyms = []
129
 
130
        if 'MODEL_NAME' in value:
131
            list_synonyms.extend(value['MODEL_NAME'])
132
 
133
        if 'MODEL_NUMBER' in value:
134
            list_synonyms.extend(value['MODEL_NUMBER'])
135
 
136
        synonyms[int(key)] = list_synonyms
137
 
138
    return synonyms
139
 
140
if __name__ == '__main__':
141
    extracter = BrandAndModelExtracter()
142
#    print extracter.extract('Nokia X5-01 (Pink)')
143
    print getSynonyms()