Subversion Repositories SmartDukaan

Rev

Rev 3837 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/usr/bin/python

"""

 This script gets product activities by source for a date from google app engine and dumps it in pentaho bi database.

"""

import json
import urllib2
import MySQLdb
import datetime
import sys

DB_HOST = "localhost"
DB_USER = "root"
DB_PASSWORD = "shop2020"
DB_NAME = "sales"

sourceIdMap = {}

def getDbConnection():
    return MySQLdb.connect(DB_HOST, DB_USER, DB_PASSWORD, DB_NAME)
  
def closeConnection(conn):
    conn.close()
    
def getSourceId(is_paid, source):
    if sourceIdMap.has_key((is_paid, source)) :
        return sourceIdMap[(is_paid, source)] 
    
    # Prepare SQL query to insert new source into db.
    insertSql = "insert into source(is_paid, url) values (%s, %s)"
    selectSql = "select id from source where is_paid = %s and url = %s"
    conn = getDbConnection()
    try:
      # prepare a cursor object using cursor() method
      cursor = conn.cursor()
      # Execute the SQL command
      cursor.execute(insertSql, (is_paid, source))
      
      # Fetch source id.
      cursor.execute(selectSql, (is_paid, source))
      result = cursor.fetchone()
      sourceIdMap[(is_paid, source)] = result[0]
    except Exception as e:
      print "Error: unable to fetch data"
      print e
    
    return sourceIdMap[(is_paid, source)]

def loadSourceIdMap():
    conn = getDbConnection()
    
    # Prepare SQL query to load source mapping from db.
    sql = "select id, is_paid, url from source"
    try:
      # prepare a cursor object using cursor() method
      cursor = conn.cursor()
      # Execute the SQL command
      cursor.execute(sql)
      # Fetch all the rows in a list of lists.
      results = cursor.fetchall()
      for row in results:
        sourceIdMap[(row[1], row[2])] = row[0]
      cursor.close()
    except Exception as e:
      print "Error: unable to fetch data"
      print e
      
    closeConnection(conn)

def pushResultsToDb(results):
    conn = getDbConnection()
    
    try:
      # prepare a cursor object using cursor() method
      cursor = conn.cursor()
      
      # Clean db for the date
      if len(results) > 0: cursor.execute("delete from productactivity where date_id = %s", results[0][0]) 
      
      cursor.executemany (
          """
              insert into productactivity (
              date_id, catalog_item_id, first_source_id, session_source_id, 
              view, add_to_cart, add_to_research, delete_from_cart, delete_from_research, shipping_access, 
              payment_success, order_creation, payment_failure, proceed_to_pay, unique_view, unique_add_to_cart, 
              unique_add_to_research, unique_delete_from_cart, unique_delete_from_research, unique_shipping_access,
              unique_payment_success, unique_order_creation, unique_payment_failure, unique_proceed_to_pay) 
              values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
          """, results)
      conn.commit()
      results = []
      cursor.close()
    except Exception as e:
      print "Error: unable to insert row"
      print e
    
    closeConnection(conn)

def downloadProductActivity(date):
    url = urllib2.urlopen('http://saholic-datastore.appspot.com/jsp/export-product-activity-with-source.jsp?date=' + date.strftime("%Y%m%d"))
    return url.read()
  

def start(date):
    jsonString = downloadProductActivity(date)
    print jsonString
    loadSourceIdMap()
    
    productActivities = json.loads(jsonString)
    results = []
    dateDelta = (date - datetime.datetime(2010,12,31)).days
    for activity in productActivities:
        isPaidFirstSource = 0 if not activity.has_key('isPaidFirstSource') else 1 if activity['isPaidFirstSource'] == 'true' else 0
        try :
            firstSource = str(activity['firstSource']) if activity.has_key('firstSource') else ""
        except Exception as e:
            firstSource = ""
        isPaidSessionSource = 0 if not activity.has_key('isPaidSessionSource') else 1 if activity['isPaidSessionSource'] == 'true' else 0
        try :
            sessionSource = str(activity['sessionSource']) if activity.has_key('sessionSource') else ""
        except Exception as e:
            sessionSource = ""
        firstSourceId = getSourceId(isPaidFirstSource, firstSource)
        sessionSourceId = getSourceId(isPaidSessionSource, sessionSource)
        
        catalogId = int(activity['catalogId'])
        
        view = int(activity['view']) if activity.has_key('view') else 0
        addToCart = int(activity['addToCart']) if activity.has_key('addToCart') else 0
        addToResearch = int(activity['addToResearch']) if activity.has_key('addToResearch') else 0
        deleteFromCart = int(activity['deleteFromCart']) if activity.has_key('deleteFromCart') else 0
        deleteFromResearch = int(activity['deleteFromResearch']) if activity.has_key('deleteFromResearch') else 0
        shippingAccess = int(activity['shippingAccess']) if activity.has_key('shippingAccess') else 0
        paymentSuccess = int(activity['paymentSuccess']) if activity.has_key('paymentSuccess') else 0
        orderCreation = int(activity['orderCreation']) if activity.has_key('orderCreation') else 0
        paymentFailure = int(activity['paymentFailure']) if activity.has_key('paymentFailure') else 0
        proceedToPay = int(activity['proceedToPay']) if activity.has_key('proceedToPay') else 0
        
        uniqueView = int(activity['uniqueView']) if activity.has_key('uniqueView') else 0
        uniqueAddToCart = int(activity['uniqueAddToCart']) if activity.has_key('uniqueAddToCart') else 0
        uniqueAddToResearch = int(activity['uniqueAddToResearch']) if activity.has_key('uniqueAddToResearch') else 0
        uniqueDeleteFromCart = int(activity['uniqueDeleteFromCart']) if activity.has_key('uniqueDeleteFromCart') else 0
        uniqueDeleteFromResearch = int(activity['uniqueDeleteFromResearch']) if activity.has_key('uniqueDeleteFromResearch') else 0
        uniqueShippingAccess = int(activity['uniqueShippingAccess']) if activity.has_key('uniqueShippingAccess') else 0
        uniquePaymentSuccess = int(activity['uniquePaymentSuccess']) if activity.has_key('uniquePaymentSuccess') else 0
        uniqueOrderCreation = int(activity['uniqueOrderCreation']) if activity.has_key('uniqueOrderCreation') else 0
        uniquePaymentFailure = int(activity['uniquePaymentFailure']) if activity.has_key('uniquePaymentFailure') else 0
        uniqueProceedToPay = int(activity['uniqueProceedToPay']) if activity.has_key('uniqueProceedToPay') else 0
         
        results.append((dateDelta, catalogId, firstSourceId, sessionSourceId, view, addToCart, addToResearch, 
                        deleteFromCart, deleteFromResearch, shippingAccess, paymentSuccess, orderCreation, paymentFailure, proceedToPay, 
                        uniqueView, uniqueAddToCart, uniqueAddToResearch, uniqueDeleteFromCart, uniqueDeleteFromResearch, 
                        uniqueShippingAccess, uniquePaymentSuccess, uniqueOrderCreation, uniquePaymentFailure, uniqueProceedToPay))
        
    pushResultsToDb(results)

def main():
  if len(sys.argv) > 2:
    print "Usage : ProductActivityLoader.py [date-yyyyMMdd]"
    return
  if len(sys.argv) > 1:
    datestr = sys.argv[1]
    date = datetime.datetime.strptime(datestr, '%Y%m%d')
  else:
    date = datetime.datetime.today() - datetime.timedelta(1)
    print date
  start(date)

if __name__ == '__main__':
    main()