Subversion Repositories SmartDukaan

Rev

Rev 4221 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/usr/bin/python

'''
Generates a TSV file to be uploaded to Amazon's seller central.
It has three input sources:
 1. The catalog database which it reads from the local database.
 2. FeatureValues.xls which is generated using the FeatureValueExtractor
  script in the ContentStore project.
 3. ItemNumbers.xls which is fetched from Nilesh to get UPC/EAN
 of all handsets since Amazon is adamant that they'll not accept data w/o it.
 4. Imagenames.xls which is generated using the FeatureValueExtractor
 script in the ContentStore project. This contains item id and image url mapping.

Once the CSV file is generated, a header should be added to it.

@attention: The columns and their order in the featurevalues.xls can
change depending on the object model. As such the output should be 
checked manually. 

Created on 01-Sep-2011

@author: Chandranshu
'''
import sys
import csv
import xlrd

if __name__ == '__main__' and __package__ is None:
    import os
    sys.path.insert(0, os.getcwd())
    
from shop2020.clients.CatalogClient import CatalogClient

def get_title(item):
    '''
    Returns the title of the Item in the format required by Amazon:
    <Brand> <Model Name> <Model Number> | <Color>
    '''
    title = item.brand
    if item.modelName:
        title = title + ' ' + item.modelName
    if item.modelNumber:
        title = title + ' ' + item.modelNumber
    if item.color:
        title = title + " | " + item.color
    return title

def get_hyphenated_name(item):
    '''
    Returns the URL path for a mobile phone.
    '''
    productUrl = item.brand
    if item.modelName:
        productUrl = productUrl + "-" + item.modelName
    if item.modelNumber:
        productUrl = productUrl + '-' + item.modelNumber
    productUrl = productUrl.replace("/", "-")
    productUrl = productUrl.replace(" ", "-")
    productUrl = productUrl.replace("--", "-")
    productUrl = productUrl.lower()
    return productUrl

def get_url(item):
    '''
    Returns the complete URL of a phone.
    '''
    url = "http://www.saholic.com/mobile-phones/"
    productUrl = get_hyphenated_name(item)
    productUrl = productUrl + "-" + str(item.catalogItemId)
    url = url + productUrl;
    url = url.replace("--", "-");
    return url;

def get_image_url(item):
    '''
    Returns the complete URL of the default image.
    
    @deprecated: The name of image is now available in the FeatureValues file. 
    '''
    url = "http://static0.saholic.com/images/"
    url = url + str(item.catalogItemId) + "/"
    url = url + get_hyphenated_name(item) + "-default-0.jpg"
    return url

def is_active(item):
    return item.itemStatus in [2, 3, 6]

def get_key(brand, model_number, color):
    model_number_str = ''
    try:
        model_number_str = str(int(model_number))
    except:
        model_number_str = str(model_number)
    if '(' in model_number_str:
        model_number_str = model_number_str.split('(')[0]
    return str('handsets|' + brand.strip().lower() + '|' + model_number_str.strip().lower() + '|' + color.strip().lower())

def load_item_numbers():
    filename = "/home/rajveer/Desktop/itemNumbers.xls"
    workbook = xlrd.open_workbook(filename)
    sheet = workbook.sheet_by_index(0)
    num_rows = sheet.nrows
    itemNumberMap = {}
    itemNumberTypeMap = {}
    for rownum in range(2, num_rows):
        itemNumber, unused_description, unused_pc, unused_pg, unused_tech, brand, model_number, color = sheet.row_values(rownum)[0:8]
        key = get_key(brand, model_number, color)
        itemNumberStr = str(itemNumber)
        if not itemNumberStr.isdigit():
            continue
        itemNumberMap[key] = itemNumberStr 
        if len(itemNumberStr) == 13:
            itemNumberTypeMap[key] = 'EAN'
        elif len(itemNumberStr) == 12:
            itemNumberTypeMap[key] = 'UPC'
    print itemNumberMap
    return itemNumberMap, itemNumberTypeMap


def normalize_form_factor(formFactor):
    if formFactor == 'Candybar':
        formFactor = 'candy-bar'
    elif formFactor == 'Slider':
        formFactor = 'slide'
    elif formFactor == 'Flip':
        formFactor = 'flip'
    elif formFactor == 'Clamshell':
        formFactor = 'flip'
    else:
        formFactor = ''
    return formFactor


def normalize_operating_system(opsys):
    if 'Android' in opsys:
        opsys = 'Android'
    elif 'Symbian' in opsys:
        opsys = 'Symbian'
    elif 'BlackBerry' in opsys:
        opsys = 'Blackberry'
    elif 'Windows' in opsys:
        opsys = 'Windows Phone'
    elif 'bada' in opsys or 'Bada' in opsys:
        opsys = 'Bada'
    elif 'iOS' in opsys:
        opsys = 'iOS'
    else:
        opsys = ''
    return opsys


def normalize_battery_type(batteryType):
    if 'Li-Ion' in batteryType or 'Li-ion' in batteryType or 'Lithium-ion' in batteryType:
        batteryType = 'lithium_ion'
    elif 'Li-Po' in batteryType:
        batteryType = 'lithium_metal'
    else:
        batteryType = ''
    return batteryType


def get_cellular_technology(multipleSIM, network3G):
    cellularTechnology = ''
    if multipleSIM == 'Dual-SIM':
        cellularTechnology = 'Dual SIM'
    if not cellularTechnology:
        if network3G != '':
            cellularTechnology = '3G'
        else:
            cellularTechnology = 'GSM'
    return cellularTechnology


def normalize_screen_type(screenType):
    if 'LCD' in screenType or 'Nova' in screenType or 'Retina' in screenType:
        screenType = 'LCD'
    elif 'LED' in screenType:
        screenType = 'LED'
    else:
        screenType = ''
    return screenType

def get_hotspot_mapping(mappings):
    for mapping in mappings:
        if mapping.vendorId == 1:
            return mapping.itemKey
    return None

def load_item_id_image_url_map():
    filename = "/home/rajveer/Desktop/imagenames.xls"
    workbook = xlrd.open_workbook(filename)
    sheet = workbook.sheet_by_index(0)
    num_rows = sheet.nrows
    itemIdMap = {}
    for rownum in range(0, num_rows):
        itemId, image_url = sheet.row_values(rownum)[0:2]
        itemIdMap[itemId] = image_url
    return itemIdMap

    
def main():
    itemNumberMap, itemNumberTypeMap = load_item_numbers() 
    catalog_client = CatalogClient().get_client()
    item_details = []
    filename = "/home/rajveer/Desktop/featurevalues.xls"
    workbook = xlrd.open_workbook(filename)
    sheet = workbook.sheet_by_index(0)
    num_rows = sheet.nrows
    
    writer = csv.writer(open("junglee.csv", "wb"), delimiter='\t', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(["Amazon.com Product Ads Header","Purge-Replace=false","","","","","",\
                 "","","","","","","",\
                 "","","","","","",\
                 "","","","","","",\
                 "","","","","","",\
                 "","","","","","",\
                 "","","","","","","",\
                 "","","","","",\
                 "","","","","","","", "","",\
                 "","","","","","",\
                 "","","","","",\
                 "","","",""])
    writer.writerow(["SKU","Title","Link","Price","Delivery Time","Recommended Browse Node","Standard Product ID",\
                 "Product ID Type","Category","Description","Shipping Cost","Image","List Price","Availability",\
                 "Brand","Manufacturer","Mfr part number","Model Number","Computer CPU speed","Hard disk size",\
                 "Included RAM size","Optical zoom","Digital zoom","Megapixels","Display size","Screen Resolution",\
                 "Display Technology","Flash drive Size","Memory Card Type","Camera type","Viewfinder type","Flash type",\
                 "Cellular Technology","Phone Operating System","Talk Time","Standby Time","User Input","Device Type",\
                 "Form Factor","Colour Name","Colour Map","Item package quantity","Age","Warranty","Assembly required",\
                 "Battery Type","Batteries Included","Batteries Required","Power Source","Power Adapter Included",\
                 "Shipping Weight","Weight","Length","Height","Width","Keywords1","Keywords2", "Keywords3","Keywords4",\
                 "Keywords5","Bullet point1","Bullet point2","Bullet point3","Bullet point4","Bullet point5",\
                 "Other image-url1","Other image-url2","Other image-url3","Other image-url4","Other image-url5",
                 "Offer note","Is Gift Wrap Available","Registered Parameter","Update Delete"])
    
    itemIdImageUrlMap = load_item_id_image_url_map()
    
    for rownum in range(2, num_rows): #2 is used as the starting index because first row is a test product with 12 years of warranty.
        unused_categoryName, unused_entityName, entityID, image_url, unused_accessories, unused_softwareApplications, unused_pageTitle,\
        unused_metaDescription, metaKeywords, unused_snippets, unused_shortSnippet, tagline,\
        unused_skinSize, screenSize, unused_screenLeftUpperCornerDimension, unused_modelNameSynonyms, unused_modelNumberSynonyms,\
        warranty, unused_warranty_type, unused_warranty_coverage, \
        weight, size, formFactor, unused_color, screenType, screenSize, screenResolution, unused_numberOfColors, unused_keyboardType,\
        unused_navigation, touchscreenType, unused_sideControls, unused_multimediaKeys, multipleSIM, unused_voip, unused_network2G,\
        network3G, unused_gprs, unused_edge, unused_g3, unused_wifi, unused_bluetooth, unused_usb, unused_musicFormats, unused_earphone, unused_speakerPhone,\
        unused_fmRadio, unused_internetRadio, unused_ringtoneTypes, unused_fileFormats, unused_streaming, unused_liveTV, unused_hdVideoPlayback,\
        unused_resolution, unused_flash, unused_imageFormats, unused_numberOfCameras, unused_secondaryCamera, unused_additionalCameraFeatures,\
        builtIn, ram, unused_expansionType, unused_expansionCapacity, batteryType, unused_powerAdaptor, unused_musicPlayback,\
        unused_videoPlayback, unused_tvPlayback, talktime2G, unused_talktime3G, standy2G, unused_standby3G, unused_types, unused_markupLanguages,\
        unused_http_protocols, unused_browser, unused_mail_protocols, opsys, unused_java, unused_flashPlayer, unused_drm, unused_securityFeatures, unused_gpsType, unused_mms, unused_sms, unused_ems,\
        unused_instantMessaging, unused_email = sheet.row_values(rownum)[0:88]

        items = catalog_client.getItemsByCatalogId(entityID)
        active_items = filter(is_active, items)
        if not active_items:
            continue

        if screenSize:
            screenSize = screenSize.split()[0]
        
        if screenResolution:
            screenResolution = screenResolution.rsplit(' ', 1)[0]

        screenType = normalize_screen_type(screenType)

        cellularTechnology = get_cellular_technology(multipleSIM, network3G)

        opsys = normalize_operating_system(opsys)
        
        userInput = "keypad"
        if touchscreenType != "":
            userInput = "touch_screen"

        formFactor = normalize_form_factor(formFactor)

        if warranty:
            warranty = warranty + " manufacturer warranty"

        batteryType = normalize_battery_type(batteryType)
        
        weight_parts = weight.split()
        if len(weight_parts) > 1:
            weight = weight_parts[0]
            try:
                float(weight)
            except:
                weight = ''
        
        if size == "Not available" or size == '':
            length, width, height = ["", "", ""]
        else:
            list = size.split()
            length, width, height = [list[0]+" MM", list[2]+" MM", list[4]+" MM"] 
        
        keywords = []
        for keyword in metaKeywords.split(","):
            keywords.append(keyword.strip())
        if len(keywords) < 5:
            length1 = len(keywords)
            while length1 < 5:
                keywords.append('')
                length1 = length1 + 1       
        
        for keyword in keywords:
            if len(keyword) > 50:
                print keyword
        
        for item in active_items:
            stdProductId = ''
            stdProductIdType = ''
            if not item.color:
                item.color = ''
            image_url = itemIdImageUrlMap.get(item.id)
            mappings = catalog_client.getVendorItemMappings(item.id)
            key = get_hotspot_mapping(mappings)
            if key and itemNumberTypeMap.has_key(key):
                stdProductId = itemNumberMap[key]
                stdProductIdType = itemNumberTypeMap[key]
            item_details.append(
                        [item.id, get_title(item), get_url(item), item.sellingPrice, "1", "803073031", stdProductId,\
                         stdProductIdType, "Wireless", tagline, '0', str(image_url), item.mrp, "TRUE",\
                         item.brand, "", "", item.modelNumber, "", builtIn,\
                         ram, "", "", "", screenSize, screenResolution,\
                         screenType, "", "", "", "", "",\
                         cellularTechnology, opsys,\
                         #talktime2G, standy2G, - TODO: use these values after converting them to minutes and hours respectively
                         '', '', userInput, "",\
                         formFactor, item.color, "", "1", "", warranty, "FALSE",\
                         batteryType, "TRUE","TRUE", "battery-powered", "TRUE",\
                         "", weight, length, width, height, keywords[0].strip(), keywords[1].strip(), keywords[2].strip(), keywords[3].strip(),\
                         keywords[4].strip(), "", "","","","",\
                         "","","","","",\
                         "","","",""]);
    
    for item_detail in item_details:
        writer.writerow(item_detail)

if __name__ == '__main__':
    main()