Subversion Repositories SmartDukaan

Rev

Rev 4221 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3139 chandransh 1
#!/usr/bin/python
2
 
3
'''
4090 chandransh 4
Generates a TSV file to be uploaded to Amazon's seller central.
5
It has three input sources:
6
 1. The catalog database which it reads from the local database.
7
 2. FeatureValues.xls which is generated using the FeatureValueExtractor
8
  script in the ContentStore project.
9
 3. ItemNumbers.xls which is fetched from Nilesh to get UPC/EAN
10
 of all handsets since Amazon is adamant that they'll not accept data w/o it.
4229 rajveer 11
 4. Imagenames.xls which is generated using the FeatureValueExtractor
12
 script in the ContentStore project. This contains item id and image url mapping.
4090 chandransh 13
 
14
Once the CSV file is generated, a header should be added to it.
15
 
16
@attention: The columns and their order in the featurevalues.xls can
17
change depending on the object model. As such the output should be 
18
checked manually. 
19
 
3139 chandransh 20
Created on 01-Sep-2011
21
 
4090 chandransh 22
@author: Chandranshu
3139 chandransh 23
'''
24
import sys
3140 chandransh 25
import csv
3486 chandransh 26
import xlrd
3139 chandransh 27
 
28
if __name__ == '__main__' and __package__ is None:
29
    import os
30
    sys.path.insert(0, os.getcwd())
31
 
32
from shop2020.clients.CatalogClient import CatalogClient
33
 
34
def get_title(item):
4090 chandransh 35
    '''
36
    Returns the title of the Item in the format required by Amazon:
37
    <Brand> <Model Name> <Model Number> | <Color>
38
    '''
3139 chandransh 39
    title = item.brand
40
    if item.modelName:
41
        title = title + ' ' + item.modelName
42
    if item.modelNumber:
43
        title = title + ' ' + item.modelNumber
3584 chandransh 44
    if item.color:
45
        title = title + " | " + item.color
3139 chandransh 46
    return title
47
 
48
def get_hyphenated_name(item):
4090 chandransh 49
    '''
50
    Returns the URL path for a mobile phone.
51
    '''
3139 chandransh 52
    productUrl = item.brand
53
    if item.modelName:
54
        productUrl = productUrl + "-" + item.modelName
55
    if item.modelNumber:
56
        productUrl = productUrl + '-' + item.modelNumber
57
    productUrl = productUrl.replace("/", "-")
58
    productUrl = productUrl.replace(" ", "-")
59
    productUrl = productUrl.replace("--", "-")
60
    productUrl = productUrl.lower()
61
    return productUrl
62
 
63
def get_url(item):
4090 chandransh 64
    '''
65
    Returns the complete URL of a phone.
66
    '''
3139 chandransh 67
    url = "http://www.saholic.com/mobile-phones/"
68
    productUrl = get_hyphenated_name(item)
69
    productUrl = productUrl + "-" + str(item.catalogItemId)
70
    url = url + productUrl;
71
    url = url.replace("--", "-");
72
    return url;
73
 
74
def get_image_url(item):
4090 chandransh 75
    '''
76
    Returns the complete URL of the default image.
77
 
78
    @deprecated: The name of image is now available in the FeatureValues file. 
79
    '''
3139 chandransh 80
    url = "http://static0.saholic.com/images/"
81
    url = url + str(item.catalogItemId) + "/"
82
    url = url + get_hyphenated_name(item) + "-default-0.jpg"
83
    return url
84
 
3486 chandransh 85
def is_active(item):
86
    return item.itemStatus in [2, 3, 6]
87
 
3502 chandransh 88
def get_key(brand, model_number, color):
89
    model_number_str = ''
90
    try:
91
        model_number_str = str(int(model_number))
92
    except:
93
        model_number_str = str(model_number)
94
    if '(' in model_number_str:
95
        model_number_str = model_number_str.split('(')[0]
3992 chandransh 96
    return str('handsets|' + brand.strip().lower() + '|' + model_number_str.strip().lower() + '|' + color.strip().lower())
3502 chandransh 97
 
98
def load_item_numbers():
4219 rajveer 99
    filename = "/home/rajveer/Desktop/itemNumbers.xls"
3502 chandransh 100
    workbook = xlrd.open_workbook(filename)
101
    sheet = workbook.sheet_by_index(0)
102
    num_rows = sheet.nrows
103
    itemNumberMap = {}
104
    itemNumberTypeMap = {}
3992 chandransh 105
    for rownum in range(2, num_rows):
3502 chandransh 106
        itemNumber, unused_description, unused_pc, unused_pg, unused_tech, brand, model_number, color = sheet.row_values(rownum)[0:8]
3992 chandransh 107
        key = get_key(brand, model_number, color)
108
        itemNumberStr = str(itemNumber)
109
        if not itemNumberStr.isdigit():
110
            continue
111
        itemNumberMap[key] = itemNumberStr 
112
        if len(itemNumberStr) == 13:
3502 chandransh 113
            itemNumberTypeMap[key] = 'EAN'
3992 chandransh 114
        elif len(itemNumberStr) == 12:
3502 chandransh 115
            itemNumberTypeMap[key] = 'UPC'
116
    print itemNumberMap
117
    return itemNumberMap, itemNumberTypeMap
118
 
3584 chandransh 119
 
120
def normalize_form_factor(formFactor):
121
    if formFactor == 'Candybar':
122
        formFactor = 'candy-bar'
123
    elif formFactor == 'Slider':
124
        formFactor = 'slide'
125
    elif formFactor == 'Flip':
126
        formFactor = 'flip'
127
    elif formFactor == 'Clamshell':
128
        formFactor = 'flip'
129
    else:
130
        formFactor = ''
131
    return formFactor
132
 
133
 
134
def normalize_operating_system(opsys):
135
    if 'Android' in opsys:
136
        opsys = 'Android'
137
    elif 'Symbian' in opsys:
138
        opsys = 'Symbian'
139
    elif 'BlackBerry' in opsys:
140
        opsys = 'Blackberry'
141
    elif 'Windows' in opsys:
142
        opsys = 'Windows Phone'
143
    elif 'bada' in opsys or 'Bada' in opsys:
144
        opsys = 'Bada'
145
    elif 'iOS' in opsys:
146
        opsys = 'iOS'
147
    else:
148
        opsys = ''
149
    return opsys
150
 
151
 
152
def normalize_battery_type(batteryType):
153
    if 'Li-Ion' in batteryType or 'Li-ion' in batteryType or 'Lithium-ion' in batteryType:
154
        batteryType = 'lithium_ion'
155
    elif 'Li-Po' in batteryType:
156
        batteryType = 'lithium_metal'
157
    else:
158
        batteryType = ''
159
    return batteryType
160
 
161
 
162
def get_cellular_technology(multipleSIM, network3G):
163
    cellularTechnology = ''
164
    if multipleSIM == 'Dual-SIM':
165
        cellularTechnology = 'Dual SIM'
166
    if not cellularTechnology:
167
        if network3G != '':
168
            cellularTechnology = '3G'
169
        else:
170
            cellularTechnology = 'GSM'
171
    return cellularTechnology
172
 
173
 
174
def normalize_screen_type(screenType):
175
    if 'LCD' in screenType or 'Nova' in screenType or 'Retina' in screenType:
176
        screenType = 'LCD'
177
    elif 'LED' in screenType:
178
        screenType = 'LED'
179
    else:
180
        screenType = ''
181
    return screenType
182
 
3992 chandransh 183
def get_hotspot_mapping(mappings):
184
    for mapping in mappings:
185
        if mapping.vendorId == 1:
186
            return mapping.itemKey
187
    return None
188
 
4229 rajveer 189
def load_item_id_image_url_map():
190
    filename = "/home/rajveer/Desktop/imagenames.xls"
191
    workbook = xlrd.open_workbook(filename)
192
    sheet = workbook.sheet_by_index(0)
193
    num_rows = sheet.nrows
194
    itemIdMap = {}
195
    for rownum in range(0, num_rows):
196
        itemId, image_url = sheet.row_values(rownum)[0:2]
197
        itemIdMap[itemId] = image_url
198
    return itemIdMap
199
 
200
 
3139 chandransh 201
def main():
3502 chandransh 202
    itemNumberMap, itemNumberTypeMap = load_item_numbers() 
3139 chandransh 203
    catalog_client = CatalogClient().get_client()
204
    item_details = []
4219 rajveer 205
    filename = "/home/rajveer/Desktop/featurevalues.xls"
3486 chandransh 206
    workbook = xlrd.open_workbook(filename)
207
    sheet = workbook.sheet_by_index(0)
208
    num_rows = sheet.nrows
3140 chandransh 209
 
3486 chandransh 210
    writer = csv.writer(open("junglee.csv", "wb"), delimiter='\t', quoting=csv.QUOTE_MINIMAL)
4117 chandransh 211
    writer.writerow(["Amazon.com Product Ads Header","Purge-Replace=false","","","","","",\
4100 chandransh 212
                 "","","","","","","",\
213
                 "","","","","","",\
214
                 "","","","","","",\
215
                 "","","","","","",\
216
                 "","","","","","",\
217
                 "","","","","","","",\
218
                 "","","","","",\
219
                 "","","","","","","", "","",\
220
                 "","","","","","",\
4117 chandransh 221
                 "","","","","",\
4100 chandransh 222
                 "","","",""])
3486 chandransh 223
    writer.writerow(["SKU","Title","Link","Price","Delivery Time","Recommended Browse Node","Standard Product ID",\
224
                 "Product ID Type","Category","Description","Shipping Cost","Image","List Price","Availability",\
225
                 "Brand","Manufacturer","Mfr part number","Model Number","Computer CPU speed","Hard disk size",\
226
                 "Included RAM size","Optical zoom","Digital zoom","Megapixels","Display size","Screen Resolution",\
227
                 "Display Technology","Flash drive Size","Memory Card Type","Camera type","Viewfinder type","Flash type",\
228
                 "Cellular Technology","Phone Operating System","Talk Time","Standby Time","User Input","Device Type",\
229
                 "Form Factor","Colour Name","Colour Map","Item package quantity","Age","Warranty","Assembly required",\
230
                 "Battery Type","Batteries Included","Batteries Required","Power Source","Power Adapter Included",\
231
                 "Shipping Weight","Weight","Length","Height","Width","Keywords1","Keywords2", "Keywords3","Keywords4",\
232
                 "Keywords5","Bullet point1","Bullet point2","Bullet point3","Bullet point4","Bullet point5",\
233
                 "Other image-url1","Other image-url2","Other image-url3","Other image-url4","Other image-url5",
234
                 "Offer note","Is Gift Wrap Available","Registered Parameter","Update Delete"])
235
 
4229 rajveer 236
    itemIdImageUrlMap = load_item_id_image_url_map()
237
 
3992 chandransh 238
    for rownum in range(2, num_rows): #2 is used as the starting index because first row is a test product with 12 years of warranty.
4003 chandransh 239
        unused_categoryName, unused_entityName, entityID, image_url, unused_accessories, unused_softwareApplications, unused_pageTitle,\
4117 chandransh 240
        unused_metaDescription, metaKeywords, unused_snippets, unused_shortSnippet, tagline,\
3486 chandransh 241
        unused_skinSize, screenSize, unused_screenLeftUpperCornerDimension, unused_modelNameSynonyms, unused_modelNumberSynonyms,\
3992 chandransh 242
        warranty, unused_warranty_type, unused_warranty_coverage, \
4117 chandransh 243
        weight, size, formFactor, unused_color, screenType, screenSize, screenResolution, unused_numberOfColors, unused_keyboardType,\
244
        unused_navigation, touchscreenType, unused_sideControls, unused_multimediaKeys, multipleSIM, unused_voip, unused_network2G,\
245
        network3G, unused_gprs, unused_edge, unused_g3, unused_wifi, unused_bluetooth, unused_usb, unused_musicFormats, unused_earphone, unused_speakerPhone,\
246
        unused_fmRadio, unused_internetRadio, unused_ringtoneTypes, unused_fileFormats, unused_streaming, unused_liveTV, unused_hdVideoPlayback,\
247
        unused_resolution, unused_flash, unused_imageFormats, unused_numberOfCameras, unused_secondaryCamera, unused_additionalCameraFeatures,\
248
        builtIn, ram, unused_expansionType, unused_expansionCapacity, batteryType, unused_powerAdaptor, unused_musicPlayback,\
249
        unused_videoPlayback, unused_tvPlayback, talktime2G, unused_talktime3G, standy2G, unused_standby3G, unused_types, unused_markupLanguages,\
3486 chandransh 250
        unused_http_protocols, unused_browser, unused_mail_protocols, opsys, unused_java, unused_flashPlayer, unused_drm, unused_securityFeatures, unused_gpsType, unused_mms, unused_sms, unused_ems,\
4003 chandransh 251
        unused_instantMessaging, unused_email = sheet.row_values(rownum)[0:88]
3486 chandransh 252
 
3502 chandransh 253
        items = catalog_client.getItemsByCatalogId(entityID)
254
        active_items = filter(is_active, items)
255
        if not active_items:
256
            continue
257
 
3486 chandransh 258
        if screenSize:
259
            screenSize = screenSize.split()[0]
260
 
261
        if screenResolution:
262
            screenResolution = screenResolution.rsplit(' ', 1)[0]
263
 
3584 chandransh 264
        screenType = normalize_screen_type(screenType)
3486 chandransh 265
 
3584 chandransh 266
        cellularTechnology = get_cellular_technology(multipleSIM, network3G)
3486 chandransh 267
 
3584 chandransh 268
        opsys = normalize_operating_system(opsys)
3486 chandransh 269
 
270
        userInput = "keypad"
271
        if touchscreenType != "":
4221 rajveer 272
            userInput = "touch_screen"
3486 chandransh 273
 
3584 chandransh 274
        formFactor = normalize_form_factor(formFactor)
3486 chandransh 275
 
276
        if warranty:
277
            warranty = warranty + " manufacturer warranty"
278
 
3584 chandransh 279
        batteryType = normalize_battery_type(batteryType)
3486 chandransh 280
 
3505 chandransh 281
        weight_parts = weight.split()
282
        if len(weight_parts) > 1:
283
            weight = weight_parts[0]
284
            try:
285
                float(weight)
286
            except:
287
                weight = ''
288
 
3486 chandransh 289
        if size == "Not available" or size == '':
290
            length, width, height = ["", "", ""]
291
        else:
292
            list = size.split()
4219 rajveer 293
            length, width, height = [list[0]+" MM", list[2]+" MM", list[4]+" MM"] 
3486 chandransh 294
 
3992 chandransh 295
        keywords = []
296
        for keyword in metaKeywords.split(","):
297
            keywords.append(keyword.strip())
3486 chandransh 298
        if len(keywords) < 5:
4219 rajveer 299
            length1 = len(keywords)
300
            while length1 < 5:
3486 chandransh 301
                keywords.append('')
4219 rajveer 302
                length1 = length1 + 1       
3486 chandransh 303
 
3992 chandransh 304
        for keyword in keywords:
305
            if len(keyword) > 50:
306
                print keyword
307
 
3502 chandransh 308
        for item in active_items:
309
            stdProductId = ''
310
            stdProductIdType = ''
311
            if not item.color:
312
                item.color = ''
4229 rajveer 313
            image_url = itemIdImageUrlMap.get(item.id)
3992 chandransh 314
            mappings = catalog_client.getVendorItemMappings(item.id)
315
            key = get_hotspot_mapping(mappings)
316
            if key and itemNumberTypeMap.has_key(key):
3502 chandransh 317
                stdProductId = itemNumberMap[key]
318
                stdProductIdType = itemNumberTypeMap[key]
319
            item_details.append(
4219 rajveer 320
                        [item.id, get_title(item), get_url(item), item.sellingPrice, "1", "803073031", stdProductId,\
321
                         stdProductIdType, "Wireless", tagline, '0', str(image_url), item.mrp, "TRUE",\
3502 chandransh 322
                         item.brand, "", "", item.modelNumber, "", builtIn,\
323
                         ram, "", "", "", screenSize, screenResolution,\
324
                         screenType, "", "", "", "", "",\
3505 chandransh 325
                         cellularTechnology, opsys,\
4090 chandransh 326
                         #talktime2G, standy2G, - TODO: use these values after converting them to minutes and hours respectively
3505 chandransh 327
                         '', '', userInput, "",\
3502 chandransh 328
                         formFactor, item.color, "", "1", "", warranty, "FALSE",\
329
                         batteryType, "TRUE","TRUE", "battery-powered", "TRUE",\
3584 chandransh 330
                         "", weight, length, width, height, keywords[0].strip(), keywords[1].strip(), keywords[2].strip(), keywords[3].strip(),\
331
                         keywords[4].strip(), "", "","","","",\
3502 chandransh 332
                         "","","","","",\
333
                         "","","",""]);
3486 chandransh 334
 
3140 chandransh 335
    for item_detail in item_details:
336
        writer.writerow(item_detail)
3139 chandransh 337
 
338
if __name__ == '__main__':
4100 chandransh 339
    main()