Subversion Repositories SmartDukaan

Rev

Rev 5377 | Rev 5639 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 26-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
3350 varun.gupt 6
import json, sys, os
3232 varun.gupt 7
 
3350 varun.gupt 8
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
9
if cmd_folder not in sys.path:
10
    sys.path.insert(0, cmd_folder)
11
 
12
from shop2020.clients.CatalogClient import CatalogClient
5401 varun.gupt 13
from shop2020.thriftpy.model.v1.catalog.ttypes import status
3350 varun.gupt 14
 
3453 varun.gupt 15
CHARACTER_ENCODING = 'ISO-8859-1'
16
 
17
class BrandAndModelExtracter:
18
 
19
    def __init__(self):
20
 
21
        try:
22
            client = CatalogClient().get_client()
23
            self.brands = client.getAllBrandsByCategory(10001)
24
        except Exception:
25
            self.brands = ['Micromax', 'BlackBerry', 'Blackberry', 'Motorola', 'Alcatel', 'Sony Ericsson', 'Apple', \
26
                      'Spice', 'Nokia', 'HTC', 'Samsung', 'LG', 'Dell', 'Karbonn', 'Lava']
27
 
28
        self.brands.append('Blackberry') #To resolve issue of 'BlackBerry' and 'Blackberry'
29
 
30
    def extract(self, full_name):
4039 varun.gupt 31
        full_name = full_name.strip()
3453 varun.gupt 32
 
33
        for brand in self.brands:
34
            if full_name.startswith(brand):  return (brand, full_name.replace(brand, '').strip())
35
 
36
        return ("", full_name)
37
 
5291 varun.gupt 38
class DuplicateMappingTracker:
39
 
40
    def __init__(self):
41
        self.mapping = {}
42
 
43
    def track(self, url, entity_id):
44
        if url in self.mapping:
45
            self.mapping[url].append(entity_id)
46
        else:
47
            self.mapping[url] = [entity_id]
48
 
49
    def getDuplicateMappings(self):
50
 
51
        duplicate_mappings = {}
52
 
53
        for url, entities in duplicate_mappings.iteritems():
54
            if len(entities) > 1:
55
                duplicate_mappings[url] = entities
56
 
57
        return duplicate_mappings
58
 
4198 varun.gupt 59
def getURLSource(url):
60
    try:
61
        return str(url.split('.')[1].strip())
62
    except Exception:
63
        return None
64
 
3232 varun.gupt 65
def isValidRule(rule):
66
    try:
67
        if rule is None:
68
            return False
69
 
70
        elif rule['url'] is None:
71
            return False
72
 
73
        elif rule['source'] is None:
74
            return False
75
 
76
        else:
77
            return True
78
 
79
    except KeyError:
80
        return False
81
 
82
def getItemsWithTopScore(items):
83
    filterd_items = []
84
    top_score = -1.0
85
 
86
    for item in items:
87
        if item['score'] >= top_score:
88
            filterd_items.append(item)
89
            top_score = item['score'] 
90
        else:
91
            return filterd_items
92
 
93
    return filterd_items
94
 
95
def isPriceSame(items):
96
    for i in range(0, items.__len__() - 1):
97
        if items[i]['price'] != items[i + 1]['price']:    return False
98
 
3313 varun.gupt 99
    return True
100
 
101
def getProductClusters(products):
102
    '''
103
    Receives a list of products (returned from search results) &
104
    returns a clustered dictionary, where products are grouped by
105
    the 'source'
106
    '''
5291 varun.gupt 107
    clustered_results = {'adexmart': [], 'flipkart': [], 'homeshop18': [], 'infibeam': [], 'snapdeal': []}
3313 varun.gupt 108
 
109
    for product in products:
110
        clustered_results[product['source']].append(product)
111
 
112
    return clustered_results
113
 
114
def getFilteredClustersWithTopScores(product_clusters):
115
    filtered_cluster = {}
116
 
117
    for source, products in product_clusters.iteritems():
118
        filtered_cluster[source] = getItemsWithTopScore(products)
119
 
120
    return filtered_cluster
121
 
122
def removePriceFormatting(price_string):
4199 varun.gupt 123
    return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace(' ', '').split('.')[0]
3313 varun.gupt 124
 
4198 varun.gupt 125
def getSearchURL(source, name):
126
 
127
        search_urls = {
128
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
129
            'homeshop18': 'http://www.homeshop18.com/$$/search:$$/categoryid:3024',
130
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
131
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
5377 varun.gupt 132
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
133
            'snapdeal': 'http://www.snapdeal.com/search?catId=&categoryId=12&locUsed=false&vertical=p&keyword=$$'
4198 varun.gupt 134
        }
135
        return search_urls[source].replace('$$', name)
136
 
137
def getDisplayInfo(filtered_cluster, product_name):
5291 varun.gupt 138
    display_info = {'adexmart': {}, 'flipkart': {}, 'homeshop18': {}, 'infibeam': {}, 'snapdeal': {}}
3313 varun.gupt 139
 
140
    for source, products in filtered_cluster.iteritems():
141
 
142
        if len(products) > 0:
143
            if isPriceSame(products):
144
                display_info[source]['price'] = products[0]['price']
145
                display_info[source]['data'] = None
146
                display_info[source]['url'] = products[0]['url']
147
                display_info[source]['text'] = removePriceFormatting(products[0]['price'])
3453 varun.gupt 148
                display_info[source]['title'] = products[0]['name']
3313 varun.gupt 149
            else:
150
                display_info[source]['price'] = None
151
                display_info[source]['data'] = json.dumps(products)
152
                display_info[source]['url'] = None
153
                display_info[source]['text'] = 'Conflict'
154
        else:
155
            display_info[source]['price'] = None
156
            display_info[source]['data'] = None
4198 varun.gupt 157
            display_info[source]['url'] = getSearchURL(source, product_name)
3313 varun.gupt 158
            display_info[source]['text'] = 'Not Found'
159
 
3453 varun.gupt 160
    return display_info
161
 
162
def getSynonyms():
4198 varun.gupt 163
    file_path = '/tmp/price-comp-dashboard/synonyms.json'
3453 varun.gupt 164
    file = open(file_path, 'r')
165
    synonyms_json = file.read()
166
 
167
    synonyms = {}
168
    for key, value in json.loads(synonyms_json).iteritems():
169
        list_synonyms = []
170
 
171
        if 'MODEL_NAME' in value:
172
            list_synonyms.extend(value['MODEL_NAME'])
173
 
174
        if 'MODEL_NUMBER' in value:
175
            list_synonyms.extend(value['MODEL_NUMBER'])
176
 
177
        synonyms[int(key)] = list_synonyms
178
 
179
    return synonyms
180
 
5401 varun.gupt 181
def getValidItems():
182
    catalog_client = CatalogClient().get_client()
183
    items = catalog_client.getAllItemsByStatus(status.ACTIVE)
184
    items.extend(catalog_client.getAllItemsByStatus(status.PAUSED))
185
    items.extend(catalog_client.getAllItemsByStatus(status.PAUSED_BY_RISK))
186
    products = {}
187
 
188
    for item in items:
189
        if item.category in (10002, 10003, 10004, 10005, 10010):  products[item.catalogItemId] = item
190
 
191
    return sorted(products.itervalues(), key = lambda item: item.brand)
192
 
193
def getProductName(item):
194
    model_name = item.modelName.strip() if len(item.modelName.strip()) > 0 else None
195
    model_number = item.modelNumber.strip() if len(item.modelNumber.strip()) > 0 else None
196
 
197
    product_name = "%s " % item.brand
198
    product_name += "%s " % model_name if model_name is not None else ''
199
    product_name += model_number if model_number is not None else ''
200
 
201
    return product_name
202
 
3453 varun.gupt 203
if __name__ == '__main__':
204
    extracter = BrandAndModelExtracter()
205
#    print extracter.extract('Nokia X5-01 (Pink)')
206
    print getSynonyms()