Subversion Repositories SmartDukaan

Rev

Rev 5291 | Rev 5401 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 26-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
3350 varun.gupt 6
import json, sys, os
3232 varun.gupt 7
 
3350 varun.gupt 8
cmd_folder = os.path.dirname(os.path.abspath(os.environ["HOME"] + "/code/trunk/PyProj/src/shop2020/"))
9
if cmd_folder not in sys.path:
10
    sys.path.insert(0, cmd_folder)
11
 
12
from shop2020.clients.CatalogClient import CatalogClient
13
 
3453 varun.gupt 14
CHARACTER_ENCODING = 'ISO-8859-1'
15
 
16
class BrandAndModelExtracter:
17
 
18
    def __init__(self):
19
 
20
        try:
21
            client = CatalogClient().get_client()
22
            self.brands = client.getAllBrandsByCategory(10001)
23
        except Exception:
24
            self.brands = ['Micromax', 'BlackBerry', 'Blackberry', 'Motorola', 'Alcatel', 'Sony Ericsson', 'Apple', \
25
                      'Spice', 'Nokia', 'HTC', 'Samsung', 'LG', 'Dell', 'Karbonn', 'Lava']
26
 
27
        self.brands.append('Blackberry') #To resolve issue of 'BlackBerry' and 'Blackberry'
28
 
29
    def extract(self, full_name):
4039 varun.gupt 30
        full_name = full_name.strip()
3453 varun.gupt 31
 
32
        for brand in self.brands:
33
            if full_name.startswith(brand):  return (brand, full_name.replace(brand, '').strip())
34
 
35
        return ("", full_name)
36
 
5291 varun.gupt 37
class DuplicateMappingTracker:
38
 
39
    def __init__(self):
40
        self.mapping = {}
41
 
42
    def track(self, url, entity_id):
43
        if url in self.mapping:
44
            self.mapping[url].append(entity_id)
45
        else:
46
            self.mapping[url] = [entity_id]
47
 
48
    def getDuplicateMappings(self):
49
 
50
        duplicate_mappings = {}
51
 
52
        for url, entities in duplicate_mappings.iteritems():
53
            if len(entities) > 1:
54
                duplicate_mappings[url] = entities
55
 
56
        return duplicate_mappings
57
 
4198 varun.gupt 58
def getURLSource(url):
59
    try:
60
        return str(url.split('.')[1].strip())
61
    except Exception:
62
        return None
63
 
3232 varun.gupt 64
def isValidRule(rule):
65
    try:
66
        if rule is None:
67
            return False
68
 
69
        elif rule['url'] is None:
70
            return False
71
 
72
        elif rule['source'] is None:
73
            return False
74
 
75
        else:
76
            return True
77
 
78
    except KeyError:
79
        return False
80
 
81
def getItemsWithTopScore(items):
82
    filterd_items = []
83
    top_score = -1.0
84
 
85
    for item in items:
86
        if item['score'] >= top_score:
87
            filterd_items.append(item)
88
            top_score = item['score'] 
89
        else:
90
            return filterd_items
91
 
92
    return filterd_items
93
 
94
def isPriceSame(items):
95
    for i in range(0, items.__len__() - 1):
96
        if items[i]['price'] != items[i + 1]['price']:    return False
97
 
3313 varun.gupt 98
    return True
99
 
100
def getProductClusters(products):
101
    '''
102
    Receives a list of products (returned from search results) &
103
    returns a clustered dictionary, where products are grouped by
104
    the 'source'
105
    '''
5291 varun.gupt 106
    clustered_results = {'adexmart': [], 'flipkart': [], 'homeshop18': [], 'infibeam': [], 'snapdeal': []}
3313 varun.gupt 107
 
108
    for product in products:
109
        clustered_results[product['source']].append(product)
110
 
111
    return clustered_results
112
 
113
def getFilteredClustersWithTopScores(product_clusters):
114
    filtered_cluster = {}
115
 
116
    for source, products in product_clusters.iteritems():
117
        filtered_cluster[source] = getItemsWithTopScore(products)
118
 
119
    return filtered_cluster
120
 
121
def removePriceFormatting(price_string):
4199 varun.gupt 122
    return price_string.strip().replace('Rs.', '').replace('Rs', '').replace(',', '').replace(' ', '').replace(' ', '').split('.')[0]
3313 varun.gupt 123
 
4198 varun.gupt 124
def getSearchURL(source, name):
125
 
126
        search_urls = {
127
            'flipkart': 'http://www.flipkart.com/search-mobiles?query=$$&from=all&searchGroup=mobiles',
128
            'homeshop18': 'http://www.homeshop18.com/$$/search:$$/categoryid:3024',
129
            'adexmart': 'http://adexmart.com/search.php?orderby=position&orderway=desc&search_query=$$',
130
            'infibeam': 'http://www.infibeam.com/Mobiles/search?q=$$',
5377 varun.gupt 131
            'letsbuy': 'http://www.letsbuy.com/advanced_search_result.php?cPath=254&keywords=$$',
132
            'snapdeal': 'http://www.snapdeal.com/search?catId=&categoryId=12&locUsed=false&vertical=p&keyword=$$'
4198 varun.gupt 133
        }
134
        return search_urls[source].replace('$$', name)
135
 
136
def getDisplayInfo(filtered_cluster, product_name):
5291 varun.gupt 137
    display_info = {'adexmart': {}, 'flipkart': {}, 'homeshop18': {}, 'infibeam': {}, 'snapdeal': {}}
3313 varun.gupt 138
 
139
    for source, products in filtered_cluster.iteritems():
140
 
141
        if len(products) > 0:
142
            if isPriceSame(products):
143
                display_info[source]['price'] = products[0]['price']
144
                display_info[source]['data'] = None
145
                display_info[source]['url'] = products[0]['url']
146
                display_info[source]['text'] = removePriceFormatting(products[0]['price'])
3453 varun.gupt 147
                display_info[source]['title'] = products[0]['name']
3313 varun.gupt 148
            else:
149
                display_info[source]['price'] = None
150
                display_info[source]['data'] = json.dumps(products)
151
                display_info[source]['url'] = None
152
                display_info[source]['text'] = 'Conflict'
153
        else:
154
            display_info[source]['price'] = None
155
            display_info[source]['data'] = None
4198 varun.gupt 156
            display_info[source]['url'] = getSearchURL(source, product_name)
3313 varun.gupt 157
            display_info[source]['text'] = 'Not Found'
158
 
3453 varun.gupt 159
    return display_info
160
 
161
def getSynonyms():
4198 varun.gupt 162
    file_path = '/tmp/price-comp-dashboard/synonyms.json'
3453 varun.gupt 163
    file = open(file_path, 'r')
164
    synonyms_json = file.read()
165
 
166
    synonyms = {}
167
    for key, value in json.loads(synonyms_json).iteritems():
168
        list_synonyms = []
169
 
170
        if 'MODEL_NAME' in value:
171
            list_synonyms.extend(value['MODEL_NAME'])
172
 
173
        if 'MODEL_NUMBER' in value:
174
            list_synonyms.extend(value['MODEL_NUMBER'])
175
 
176
        synonyms[int(key)] = list_synonyms
177
 
178
    return synonyms
179
 
180
if __name__ == '__main__':
181
    extracter = BrandAndModelExtracter()
182
#    print extracter.extract('Nokia X5-01 (Pink)')
183
    print getSynonyms()