| 3139 |
chandransh |
1 |
#!/usr/bin/python
|
|
|
2 |
|
|
|
3 |
'''
|
| 4090 |
chandransh |
4 |
Generates a TSV file to be uploaded to Amazon's seller central.
|
|
|
5 |
It has three input sources:
|
|
|
6 |
1. The catalog database which it reads from the local database.
|
|
|
7 |
2. FeatureValues.xls which is generated using the FeatureValueExtractor
|
|
|
8 |
script in the ContentStore project.
|
|
|
9 |
3. ItemNumbers.xls which is fetched from Nilesh to get UPC/EAN
|
|
|
10 |
of all handsets since Amazon is adamant that they'll not accept data w/o it.
|
|
|
11 |
|
|
|
12 |
Once the CSV file is generated, a header should be added to it.
|
|
|
13 |
|
|
|
14 |
@attention: The columns and their order in the featurevalues.xls can
|
|
|
15 |
change depending on the object model. As such the output should be
|
|
|
16 |
checked manually.
|
|
|
17 |
|
| 3139 |
chandransh |
18 |
Created on 01-Sep-2011
|
|
|
19 |
|
| 4090 |
chandransh |
20 |
@author: Chandranshu
|
| 3139 |
chandransh |
21 |
'''
|
|
|
22 |
import sys
|
| 3140 |
chandransh |
23 |
import csv
|
| 3486 |
chandransh |
24 |
import xlrd
|
| 3139 |
chandransh |
25 |
|
|
|
26 |
if __name__ == '__main__' and __package__ is None:
|
|
|
27 |
import os
|
|
|
28 |
sys.path.insert(0, os.getcwd())
|
|
|
29 |
|
|
|
30 |
from shop2020.clients.CatalogClient import CatalogClient
|
|
|
31 |
|
|
|
32 |
def get_title(item):
|
| 4090 |
chandransh |
33 |
'''
|
|
|
34 |
Returns the title of the Item in the format required by Amazon:
|
|
|
35 |
<Brand> <Model Name> <Model Number> | <Color>
|
|
|
36 |
'''
|
| 3139 |
chandransh |
37 |
title = item.brand
|
|
|
38 |
if item.modelName:
|
|
|
39 |
title = title + ' ' + item.modelName
|
|
|
40 |
if item.modelNumber:
|
|
|
41 |
title = title + ' ' + item.modelNumber
|
| 3584 |
chandransh |
42 |
if item.color:
|
|
|
43 |
title = title + " | " + item.color
|
| 3139 |
chandransh |
44 |
return title
|
|
|
45 |
|
|
|
46 |
def get_hyphenated_name(item):
|
| 4090 |
chandransh |
47 |
'''
|
|
|
48 |
Returns the URL path for a mobile phone.
|
|
|
49 |
'''
|
| 3139 |
chandransh |
50 |
productUrl = item.brand
|
|
|
51 |
if item.modelName:
|
|
|
52 |
productUrl = productUrl + "-" + item.modelName
|
|
|
53 |
if item.modelNumber:
|
|
|
54 |
productUrl = productUrl + '-' + item.modelNumber
|
|
|
55 |
productUrl = productUrl.replace("/", "-")
|
|
|
56 |
productUrl = productUrl.replace(" ", "-")
|
|
|
57 |
productUrl = productUrl.replace("--", "-")
|
|
|
58 |
productUrl = productUrl.lower()
|
|
|
59 |
return productUrl
|
|
|
60 |
|
|
|
61 |
def get_url(item):
|
| 4090 |
chandransh |
62 |
'''
|
|
|
63 |
Returns the complete URL of a phone.
|
|
|
64 |
'''
|
| 3139 |
chandransh |
65 |
url = "http://www.saholic.com/mobile-phones/"
|
|
|
66 |
productUrl = get_hyphenated_name(item)
|
|
|
67 |
productUrl = productUrl + "-" + str(item.catalogItemId)
|
|
|
68 |
url = url + productUrl;
|
|
|
69 |
url = url.replace("--", "-");
|
|
|
70 |
return url;
|
|
|
71 |
|
|
|
72 |
def get_image_url(item):
|
| 4090 |
chandransh |
73 |
'''
|
|
|
74 |
Returns the complete URL of the default image.
|
|
|
75 |
|
|
|
76 |
@deprecated: The name of image is now available in the FeatureValues file.
|
|
|
77 |
'''
|
| 3139 |
chandransh |
78 |
url = "http://static0.saholic.com/images/"
|
|
|
79 |
url = url + str(item.catalogItemId) + "/"
|
|
|
80 |
url = url + get_hyphenated_name(item) + "-default-0.jpg"
|
|
|
81 |
return url
|
|
|
82 |
|
| 3486 |
chandransh |
83 |
def is_active(item):
|
|
|
84 |
return item.itemStatus in [2, 3, 6]
|
|
|
85 |
|
| 3502 |
chandransh |
86 |
def get_key(brand, model_number, color):
|
|
|
87 |
model_number_str = ''
|
|
|
88 |
try:
|
|
|
89 |
model_number_str = str(int(model_number))
|
|
|
90 |
except:
|
|
|
91 |
model_number_str = str(model_number)
|
|
|
92 |
if '(' in model_number_str:
|
|
|
93 |
model_number_str = model_number_str.split('(')[0]
|
| 3992 |
chandransh |
94 |
return str('handsets|' + brand.strip().lower() + '|' + model_number_str.strip().lower() + '|' + color.strip().lower())
|
| 3502 |
chandransh |
95 |
|
|
|
96 |
def load_item_numbers():
|
|
|
97 |
filename = "/home/ashish/itemNumbers.xls"
|
|
|
98 |
workbook = xlrd.open_workbook(filename)
|
|
|
99 |
sheet = workbook.sheet_by_index(0)
|
|
|
100 |
num_rows = sheet.nrows
|
|
|
101 |
itemNumberMap = {}
|
|
|
102 |
itemNumberTypeMap = {}
|
| 3992 |
chandransh |
103 |
for rownum in range(2, num_rows):
|
| 3502 |
chandransh |
104 |
itemNumber, unused_description, unused_pc, unused_pg, unused_tech, brand, model_number, color = sheet.row_values(rownum)[0:8]
|
| 3992 |
chandransh |
105 |
key = get_key(brand, model_number, color)
|
|
|
106 |
itemNumberStr = str(itemNumber)
|
|
|
107 |
if not itemNumberStr.isdigit():
|
|
|
108 |
continue
|
|
|
109 |
itemNumberMap[key] = itemNumberStr
|
|
|
110 |
if len(itemNumberStr) == 13:
|
| 3502 |
chandransh |
111 |
itemNumberTypeMap[key] = 'EAN'
|
| 3992 |
chandransh |
112 |
elif len(itemNumberStr) == 12:
|
| 3502 |
chandransh |
113 |
itemNumberTypeMap[key] = 'UPC'
|
|
|
114 |
print itemNumberMap
|
|
|
115 |
return itemNumberMap, itemNumberTypeMap
|
|
|
116 |
|
| 3584 |
chandransh |
117 |
|
|
|
118 |
def normalize_form_factor(formFactor):
|
|
|
119 |
if formFactor == 'Candybar':
|
|
|
120 |
formFactor = 'candy-bar'
|
|
|
121 |
elif formFactor == 'Slider':
|
|
|
122 |
formFactor = 'slide'
|
|
|
123 |
elif formFactor == 'Flip':
|
|
|
124 |
formFactor = 'flip'
|
|
|
125 |
elif formFactor == 'Clamshell':
|
|
|
126 |
formFactor = 'flip'
|
|
|
127 |
else:
|
|
|
128 |
formFactor = ''
|
|
|
129 |
return formFactor
|
|
|
130 |
|
|
|
131 |
|
|
|
132 |
def normalize_operating_system(opsys):
|
|
|
133 |
if 'Android' in opsys:
|
|
|
134 |
opsys = 'Android'
|
|
|
135 |
elif 'Symbian' in opsys:
|
|
|
136 |
opsys = 'Symbian'
|
|
|
137 |
elif 'BlackBerry' in opsys:
|
|
|
138 |
opsys = 'Blackberry'
|
|
|
139 |
elif 'Windows' in opsys:
|
|
|
140 |
opsys = 'Windows Phone'
|
|
|
141 |
elif 'bada' in opsys or 'Bada' in opsys:
|
|
|
142 |
opsys = 'Bada'
|
|
|
143 |
elif 'iOS' in opsys:
|
|
|
144 |
opsys = 'iOS'
|
|
|
145 |
else:
|
|
|
146 |
opsys = ''
|
|
|
147 |
return opsys
|
|
|
148 |
|
|
|
149 |
|
|
|
150 |
def normalize_battery_type(batteryType):
|
|
|
151 |
if 'Li-Ion' in batteryType or 'Li-ion' in batteryType or 'Lithium-ion' in batteryType:
|
|
|
152 |
batteryType = 'lithium_ion'
|
|
|
153 |
elif 'Li-Po' in batteryType:
|
|
|
154 |
batteryType = 'lithium_metal'
|
|
|
155 |
else:
|
|
|
156 |
batteryType = ''
|
|
|
157 |
return batteryType
|
|
|
158 |
|
|
|
159 |
|
|
|
160 |
def get_cellular_technology(multipleSIM, network3G):
|
|
|
161 |
cellularTechnology = ''
|
|
|
162 |
if multipleSIM == 'Dual-SIM':
|
|
|
163 |
cellularTechnology = 'Dual SIM'
|
|
|
164 |
if not cellularTechnology:
|
|
|
165 |
if network3G != '':
|
|
|
166 |
cellularTechnology = '3G'
|
|
|
167 |
else:
|
|
|
168 |
cellularTechnology = 'GSM'
|
|
|
169 |
return cellularTechnology
|
|
|
170 |
|
|
|
171 |
|
|
|
172 |
def normalize_screen_type(screenType):
|
|
|
173 |
if 'LCD' in screenType or 'Nova' in screenType or 'Retina' in screenType:
|
|
|
174 |
screenType = 'LCD'
|
|
|
175 |
elif 'LED' in screenType:
|
|
|
176 |
screenType = 'LED'
|
|
|
177 |
else:
|
|
|
178 |
screenType = ''
|
|
|
179 |
return screenType
|
|
|
180 |
|
| 3992 |
chandransh |
181 |
def get_hotspot_mapping(mappings):
|
|
|
182 |
for mapping in mappings:
|
|
|
183 |
if mapping.vendorId == 1:
|
|
|
184 |
return mapping.itemKey
|
|
|
185 |
return None
|
|
|
186 |
|
| 3139 |
chandransh |
187 |
def main():
|
| 3502 |
chandransh |
188 |
itemNumberMap, itemNumberTypeMap = load_item_numbers()
|
| 3139 |
chandransh |
189 |
catalog_client = CatalogClient().get_client()
|
|
|
190 |
item_details = []
|
| 4003 |
chandransh |
191 |
filename = "/home/ashish/featurevalues3.xls"
|
| 3486 |
chandransh |
192 |
workbook = xlrd.open_workbook(filename)
|
|
|
193 |
sheet = workbook.sheet_by_index(0)
|
|
|
194 |
num_rows = sheet.nrows
|
| 3140 |
chandransh |
195 |
|
| 3486 |
chandransh |
196 |
writer = csv.writer(open("junglee.csv", "wb"), delimiter='\t', quoting=csv.QUOTE_MINIMAL)
|
| 4100 |
chandransh |
197 |
writer.writerow("Amazon.com Product Ads Header","Purge-Replace=false","","","","","",\
|
|
|
198 |
"","","","","","","",\
|
|
|
199 |
"","","","","","",\
|
|
|
200 |
"","","","","","",\
|
|
|
201 |
"","","","","","",\
|
|
|
202 |
"","","","","","",\
|
|
|
203 |
"","","","","","","",\
|
|
|
204 |
"","","","","",\
|
|
|
205 |
"","","","","","","", "","",\
|
|
|
206 |
"","","","","","",\
|
|
|
207 |
"","","","","",
|
|
|
208 |
"","","",""])
|
| 3486 |
chandransh |
209 |
writer.writerow(["SKU","Title","Link","Price","Delivery Time","Recommended Browse Node","Standard Product ID",\
|
|
|
210 |
"Product ID Type","Category","Description","Shipping Cost","Image","List Price","Availability",\
|
|
|
211 |
"Brand","Manufacturer","Mfr part number","Model Number","Computer CPU speed","Hard disk size",\
|
|
|
212 |
"Included RAM size","Optical zoom","Digital zoom","Megapixels","Display size","Screen Resolution",\
|
|
|
213 |
"Display Technology","Flash drive Size","Memory Card Type","Camera type","Viewfinder type","Flash type",\
|
|
|
214 |
"Cellular Technology","Phone Operating System","Talk Time","Standby Time","User Input","Device Type",\
|
|
|
215 |
"Form Factor","Colour Name","Colour Map","Item package quantity","Age","Warranty","Assembly required",\
|
|
|
216 |
"Battery Type","Batteries Included","Batteries Required","Power Source","Power Adapter Included",\
|
|
|
217 |
"Shipping Weight","Weight","Length","Height","Width","Keywords1","Keywords2", "Keywords3","Keywords4",\
|
|
|
218 |
"Keywords5","Bullet point1","Bullet point2","Bullet point3","Bullet point4","Bullet point5",\
|
|
|
219 |
"Other image-url1","Other image-url2","Other image-url3","Other image-url4","Other image-url5",
|
|
|
220 |
"Offer note","Is Gift Wrap Available","Registered Parameter","Update Delete"])
|
|
|
221 |
|
| 3992 |
chandransh |
222 |
for rownum in range(2, num_rows): #2 is used as the starting index because first row is a test product with 12 years of warranty.
|
| 4003 |
chandransh |
223 |
unused_categoryName, unused_entityName, entityID, image_url, unused_accessories, unused_softwareApplications, unused_pageTitle,\
|
| 3992 |
chandransh |
224 |
unused_metaDescription, metaKeywords, snippets, shortSnippet, tagline,\
|
| 3486 |
chandransh |
225 |
unused_skinSize, screenSize, unused_screenLeftUpperCornerDimension, unused_modelNameSynonyms, unused_modelNumberSynonyms,\
|
| 3992 |
chandransh |
226 |
warranty, unused_warranty_type, unused_warranty_coverage, \
|
| 3486 |
chandransh |
227 |
weight, size, formFactor, color, screenType, screenSize, screenResolution, numberOfColors, keyboardType,\
|
|
|
228 |
navigation, touchscreenType, sideControls, multimediaKeys, multipleSIM, voip, network2G,\
|
|
|
229 |
network3G, gprs, edge, g3, wifi, bluetooth, usb, musicFormats, earphone, speakerPhone,\
|
|
|
230 |
fmRadio, internetRadio, ringtoneTypes, fileFormats, streaming, liveTV, hdVideoPlayback,\
|
|
|
231 |
resolution, flash, imageFormats, numberOfCameras, secondaryCamera, additionalCameraFeatures,\
|
|
|
232 |
builtIn, ram, expansionType, expansionCapacity, batteryType, powerAdaptor, musicPlayback,\
|
|
|
233 |
videoPlayback, tvPlayback, talktime2G, talktime3G, standy2G, standby3G, types, markupLanguages,\
|
|
|
234 |
unused_http_protocols, unused_browser, unused_mail_protocols, opsys, unused_java, unused_flashPlayer, unused_drm, unused_securityFeatures, unused_gpsType, unused_mms, unused_sms, unused_ems,\
|
| 4003 |
chandransh |
235 |
unused_instantMessaging, unused_email = sheet.row_values(rownum)[0:88]
|
| 3486 |
chandransh |
236 |
|
| 3502 |
chandransh |
237 |
items = catalog_client.getItemsByCatalogId(entityID)
|
|
|
238 |
active_items = filter(is_active, items)
|
|
|
239 |
if not active_items:
|
|
|
240 |
continue
|
|
|
241 |
|
| 3486 |
chandransh |
242 |
if screenSize:
|
|
|
243 |
screenSize = screenSize.split()[0]
|
|
|
244 |
|
|
|
245 |
if screenResolution:
|
|
|
246 |
screenResolution = screenResolution.rsplit(' ', 1)[0]
|
|
|
247 |
|
| 3584 |
chandransh |
248 |
screenType = normalize_screen_type(screenType)
|
| 3486 |
chandransh |
249 |
|
| 3584 |
chandransh |
250 |
cellularTechnology = get_cellular_technology(multipleSIM, network3G)
|
| 3486 |
chandransh |
251 |
|
| 3584 |
chandransh |
252 |
opsys = normalize_operating_system(opsys)
|
| 3486 |
chandransh |
253 |
|
|
|
254 |
userInput = "keypad"
|
|
|
255 |
if touchscreenType != "":
|
|
|
256 |
userInput = "touchscreen"
|
|
|
257 |
|
| 3584 |
chandransh |
258 |
formFactor = normalize_form_factor(formFactor)
|
| 3486 |
chandransh |
259 |
|
|
|
260 |
if warranty:
|
|
|
261 |
warranty = warranty + " manufacturer warranty"
|
|
|
262 |
|
| 3584 |
chandransh |
263 |
batteryType = normalize_battery_type(batteryType)
|
| 3486 |
chandransh |
264 |
|
| 3505 |
chandransh |
265 |
weight_parts = weight.split()
|
|
|
266 |
if len(weight_parts) > 1:
|
|
|
267 |
weight = weight_parts[0]
|
|
|
268 |
try:
|
|
|
269 |
float(weight)
|
|
|
270 |
except:
|
|
|
271 |
weight = ''
|
|
|
272 |
|
| 3486 |
chandransh |
273 |
if size == "Not available" or size == '':
|
|
|
274 |
length, width, height = ["", "", ""]
|
|
|
275 |
else:
|
|
|
276 |
list = size.split()
|
|
|
277 |
length, width, height = [list[0], list[2], list[4]]
|
|
|
278 |
|
| 3992 |
chandransh |
279 |
keywords = []
|
|
|
280 |
for keyword in metaKeywords.split(","):
|
|
|
281 |
keywords.append(keyword.strip())
|
| 3486 |
chandransh |
282 |
if len(keywords) < 5:
|
|
|
283 |
length = len(keywords)
|
|
|
284 |
while length < 5:
|
|
|
285 |
keywords.append('')
|
| 3502 |
chandransh |
286 |
length = length + 1
|
| 3486 |
chandransh |
287 |
|
| 3992 |
chandransh |
288 |
for keyword in keywords:
|
|
|
289 |
if len(keyword) > 50:
|
|
|
290 |
print keyword
|
|
|
291 |
|
| 3502 |
chandransh |
292 |
for item in active_items:
|
|
|
293 |
stdProductId = ''
|
|
|
294 |
stdProductIdType = ''
|
|
|
295 |
if not item.color:
|
|
|
296 |
item.color = ''
|
| 3992 |
chandransh |
297 |
|
|
|
298 |
mappings = catalog_client.getVendorItemMappings(item.id)
|
|
|
299 |
key = get_hotspot_mapping(mappings)
|
|
|
300 |
if key and itemNumberTypeMap.has_key(key):
|
| 3502 |
chandransh |
301 |
stdProductId = itemNumberMap[key]
|
|
|
302 |
stdProductIdType = itemNumberTypeMap[key]
|
|
|
303 |
item_details.append(
|
|
|
304 |
[item.id, get_title(item), get_url(item), item.sellingPrice, "1", "803546031", stdProductId,\
|
| 4003 |
chandransh |
305 |
stdProductIdType, "Wireless", tagline, '0', 'http://' + str(image_url), item.mrp, "TRUE",\
|
| 3502 |
chandransh |
306 |
item.brand, "", "", item.modelNumber, "", builtIn,\
|
|
|
307 |
ram, "", "", "", screenSize, screenResolution,\
|
|
|
308 |
screenType, "", "", "", "", "",\
|
| 3505 |
chandransh |
309 |
cellularTechnology, opsys,\
|
| 4090 |
chandransh |
310 |
#talktime2G, standy2G, - TODO: use these values after converting them to minutes and hours respectively
|
| 3505 |
chandransh |
311 |
'', '', userInput, "",\
|
| 3502 |
chandransh |
312 |
formFactor, item.color, "", "1", "", warranty, "FALSE",\
|
|
|
313 |
batteryType, "TRUE","TRUE", "battery-powered", "TRUE",\
|
| 3584 |
chandransh |
314 |
"", weight, length, width, height, keywords[0].strip(), keywords[1].strip(), keywords[2].strip(), keywords[3].strip(),\
|
|
|
315 |
keywords[4].strip(), "", "","","","",\
|
| 3502 |
chandransh |
316 |
"","","","","",\
|
|
|
317 |
"","","",""]);
|
| 3486 |
chandransh |
318 |
|
| 3140 |
chandransh |
319 |
for item_detail in item_details:
|
|
|
320 |
writer.writerow(item_detail)
|
| 3139 |
chandransh |
321 |
|
|
|
322 |
if __name__ == '__main__':
|
| 4100 |
chandransh |
323 |
main()
|