Rev 3517 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/python"""This script gets product activities by source for a date from google app engine and dumps it in pentaho bi database."""import jsonimport urllib2import MySQLdbimport datetimeimport sysDB_HOST = "localhost"DB_USER = "root"DB_PASSWORD = "shop2020"DB_NAME = "sales"sourceIdMap = {}def getDbConnection():return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)def closeConnection(conn):conn.close()def getSourceId(is_paid, source):if sourceIdMap.has_key((is_paid, source)) :return sourceIdMap[(is_paid, source)]# Prepare SQL query to insert new source into db.insertSql = "insert into source(is_paid, url) values (%s, %s)"selectSql = "select id from source where is_paid = %s and url = %s"conn = getDbConnection()try:# prepare a cursor object using cursor() methodcursor = conn.cursor()# Execute the SQL commandcursor.execute(insertSql, (is_paid, source))# Fetch source id.cursor.execute(selectSql, (is_paid, source))result = cursor.fetchone()sourceIdMap[(is_paid, source)] = result[0]except Exception as e:print "Error: unable to fetch data"print ereturn sourceIdMap[(is_paid, source)]def loadSourceIdMap():conn = getDbConnection()# Prepare SQL query to load source mapping from db.sql = "select id, is_paid, url from source"try:# prepare a cursor object using cursor() methodcursor = conn.cursor()# Execute the SQL commandcursor.execute(sql)# Fetch all the rows in a list of lists.results = cursor.fetchall()for row in results:sourceIdMap[(row[1], row[2])] = row[0]cursor.close()except Exception as e:print "Error: unable to fetch data"print ecloseConnection(conn)def pushResultsToDb(results):conn = getDbConnection()try:# prepare a cursor object using cursor() methodcursor = conn.cursor()# Clean db for the dateif len(results) > 0: cursor.execute("delete from productactivity where date_id = %s", results[0][0])cursor.executemany ("""insert into productactivity (date_id, catalog_item_id, first_source_id, session_source_id,view, add_to_cart, add_to_research, delete_from_cart, delete_from_research, shipping_access,payment_success, order_creation, payment_failure, proceed_to_pay, unique_view, unique_add_to_cart,unique_add_to_research, unique_delete_from_cart, unique_delete_from_research, unique_shipping_access,unique_payment_success, unique_order_creation, unique_payment_failure, unique_proceed_to_pay)values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", results)conn.commit()results = []cursor.close()except Exception as e:print "Error: unable to insert row"print ecloseConnection(conn)def downloadProductActivity(date):url = urllib2.urlopen('http://saholic-datastore.appspot.com/jsp/export-product-activity-with-source.jsp?date=' + date.strftime("%Y%m%d"))return url.read()def start(date):jsonString = downloadProductActivity(date)loadSourceIdMap()productActivities = json.loads(jsonString)results = []dateDelta = (date - datetime.datetime(2010,12,31)).daysfor activity in productActivities:isPaidFirstSource = 0 if not activity.has_key('isPaidFirstSource') else 1 if activity['isPaidFirstSource'] == 'true' else 0firstSource = str(activity['firstSource']) if activity.has_key('firstSource') else ""isPaidSessionSource = 0 if not activity.has_key('isPaidSessionSource') else 1 if activity['isPaidSessionSource'] == 'true' else 0sessionSource = str(activity['sessionSource']) if activity.has_key('sessionSource') else ""firstSourceId = getSourceId(isPaidFirstSource, firstSource)sessionSourceId = getSourceId(isPaidSessionSource, sessionSource)catalogId = int(activity['catalogId'])view = int(activity['view']) if activity.has_key('view') else 0addToCart = int(activity['addToCart']) if activity.has_key('addToCart') else 0addToResearch = int(activity['addToResearch']) if activity.has_key('addToResearch') else 0deleteFromCart = int(activity['deleteFromCart']) if activity.has_key('deleteFromCart') else 0deleteFromResearch = int(activity['deleteFromResearch']) if activity.has_key('deleteFromResearch') else 0shippingAccess = int(activity['shippingAccess']) if activity.has_key('shippingAccess') else 0paymentSuccess = int(activity['paymentSuccess']) if activity.has_key('paymentSuccess') else 0orderCreation = int(activity['orderCreation']) if activity.has_key('orderCreation') else 0paymentFailure = int(activity['paymentFailure']) if activity.has_key('paymentFailure') else 0proceedToPay = int(activity['proceedToPay']) if activity.has_key('proceedToPay') else 0uniqueView = int(activity['uniqueView']) if activity.has_key('uniqueView') else 0uniqueAddToCart = int(activity['uniqueAddToCart']) if activity.has_key('uniqueAddToCart') else 0uniqueAddToResearch = int(activity['uniqueAddToResearch']) if activity.has_key('uniqueAddToResearch') else 0uniqueDeleteFromCart = int(activity['uniqueDeleteFromCart']) if activity.has_key('uniqueDeleteFromCart') else 0uniqueDeleteFromResearch = int(activity['uniqueDeleteFromResearch']) if activity.has_key('uniqueDeleteFromResearch') else 0uniqueShippingAccess = int(activity['uniqueShippingAccess']) if activity.has_key('uniqueShippingAccess') else 0uniquePaymentSuccess = int(activity['uniquePaymentSuccess']) if activity.has_key('uniquePaymentSuccess') else 0uniqueOrderCreation = int(activity['uniqueOrderCreation']) if activity.has_key('uniqueOrderCreation') else 0uniquePaymentFailure = int(activity['uniquePaymentFailure']) if activity.has_key('uniquePaymentFailure') else 0uniqueProceedToPay = int(activity['uniqueProceedToPay']) if activity.has_key('uniqueProceedToPay') else 0results.append((dateDelta, catalogId, firstSourceId, sessionSourceId, view, addToCart, addToResearch,deleteFromCart, deleteFromResearch, shippingAccess, paymentSuccess, orderCreation, paymentFailure, proceedToPay,uniqueView, uniqueAddToCart, uniqueAddToResearch, uniqueDeleteFromCart, uniqueDeleteFromResearch,uniqueShippingAccess, uniquePaymentSuccess, uniqueOrderCreation, uniquePaymentFailure, uniqueProceedToPay))pushResultsToDb(results)def main():if len(sys.argv) > 2:print "Usage : ProductActivityLoader.py [date-yyyyMMdd]"returnif len(sys.argv) > 1:datestr = sys.argv[1]date = datetime.datetime.strptime(datestr, '%Y%m%d')else:date = datetime.datetime.today() - datetime.timedelta(1)start(date)if __name__ == '__main__':main()