Subversion Repositories SmartDukaan

Rev

Rev 17368 | Rev 17480 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

# coding=utf-8
'''
Created on Jan 15, 2015

@author: amit
'''
from base64 import encode
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, date
from dtr.api.Order import process_rejects
from dtr.dao import Order, SubOrder, AmazonAffiliateInfo
from dtr.main import getStore, Store as MStore, ParseException, getBrowserObject, \
    ungzipResponse, tprint
from dtr.sources.flipkart import todict, AFF_REPORT_URL
from dtr.storage.DataService import OrdersRaw, Orders, Order_Parse_Info, \
    All_user_addresses
from dtr.storage.Mongo import getDealRank
from dtr.utils import utils
from dtr.utils.utils import fetchResponseUsingProxy, readSSh
from elixir import *
import base64
import dtr
import gzip
import mechanize
import os.path
import re
import time
import traceback
import urllib2

ORDER_REDIRECT_URL = 'https://www.amazon.in/gp/css/summary/edit.html?orderID=%s'
ORDER_SUCCESS_URL = 'https://www.amazon.in/gp/buy/spc/handlers/static-submit-decoupled.html'
THANKYOU_URL = 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html'
AMAZON_AFF_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/listReports'
AMAZON_AFF_FILE_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/getReport?filename=saholic-21-orders-report-%s.tsv.gz'
class Store(MStore):
    
    orderStatusRegexMap = { MStore.ORDER_PLACED : ['ordered from', 'not yet dispatched','dispatching now', 'preparing for dispatch', 'order received'],
                            MStore.ORDER_SHIPPED : ['dispatched on','dispatched', 'on the way', 'out for delivery', 'Out for delivery'],
                            MStore.ORDER_CANCELLED : ['return complete', 'refunded', 'cancelled', 'replacement complete', 'return received'],
                            MStore.ORDER_DELIVERED : ['delivered', 'your package was delivered', 'package was handed directly to customer']
                           }

    def __init__(self,store_id):
        super(Store, self).__init__(store_id)
        
    def getName(self):
        return "amazon"
    
    def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl, track=False):
        parseString = "Tracking" if track else "Transacted"
        print parseString, "Order Id to be parsed is :", orderId
        resp = {}
        resp['result'] = 'ORDER_NOT_CREATED'
        if ORDER_SUCCESS_URL in orderSuccessUrl or THANKYOU_URL in orderSuccessUrl:
            try:
                soup = BeautifulSoup(rawHtml)
                try:
                    orderUrl = soup.find('div', {"id":"thank-you-box-wrapper"}).div.findAll('div', recursive=False)[1].a['href']
                    merchantOrderId = re.findall(r'.*&oid=(.*)&?.*?', orderUrl)[0]
                except:
                    merchantOrderId = soup.find(id="orders-list").div.span.b.text
                order = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, True)
                order.orderTrackingUrl = ORDER_REDIRECT_URL % (merchantOrderId)
                order.orderSuccessUrl = orderSuccessUrl
                order.merchantOrderId = merchantOrderId
                order.requireDetail = True
                order.status = 'html_required'
                order.closed = None
                if self._saveToOrder(todict(order)):
                    resp['result'] = 'ORDER_CREATED'
                    resp["url"] = ORDER_REDIRECT_URL % (merchantOrderId)
                    resp["htmlRequired"] = True
                    resp['orderId'] = orderId
                else:
                    resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'
                    
            except:
                #Write all cases here for Order Not created Known
                try:
                    if not soup.body:
                        resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                    elif 'Securely redirecting you' in soup.find("h3").text.strip() or soup.find("h3").text.strip()=="Orders":
                        resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                    else:
                        raise
                except:
                    try:
                        if soup.find("h1").text.strip() in ['This is a duplicate order', 'There was a problem with your payment.', 'Your Orders', 'Your Shopping Cart is empty.', 'Select a payment method', 'Edit quantities'] or "Saved for later" in soup.find("h1").text.strip():
                            resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                        else:
                            raise
                    except:
                        try:
                            if soup.find("h2").text.strip() in ['Web page not available','Webpage not available', 'Do you have an Amazon password?']:
                                resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                            else:
                                raise
                        except:
                            try:
                                if soup.find(id="loading-spinner-img") is not None or soup.find(id="anonCarousel1") is not None or soup.find(id="ap_signin_pagelet_title") is not None or soup.find(id="nav-greeting-name") is not None:
                                    resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                                elif soup.find("b", {'class':'h1'}).text.strip().find("We're sorry") > -1:
                                    resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                                else:
                                    raise
                            except:
                                resp['result'] = 'ORDER_NOT_CREATED_UNKNOWN'
                                
        else:
            try:
                mo = self.db.merchantOrder.find_one({"orderId":orderId})
                if mo is not None:
                    merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, False)
                    merchantOrder.createdOn = mo.get("createdOn")
                    merchantOrder.createdOnInt =  mo.get("createdOnInt")
                else:
                    print "Could not find amazon order with order Id", orderId
                    merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
                soup = BeautifulSoup(rawHtml)
                if not soup.body:
                    resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
                else:   
                    try:
                        self.parseNewStlye(merchantOrder, soup)
                        resp['result'] = 'DETAIL_CREATED'
                        return
                    except:
                        try:
                            traceback.print_exc()
                            self.parseOldStlye(merchantOrder, soup)
                            resp['result'] = 'DETAIL_CREATED'
                            return
                        except:
                            traceback.print_exc()
                            try:
                                self.parseCancelled(merchantOrder, soup)
                                resp['result'] = 'ORDER_CANCELLED'
                            except:
                                try:
                                    if soup.find("h1").text.strip() in ["Your Account"] or soup.find("h1").span.text=="Account":
                                        resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
                                    else:
                                        raise
                                except:
                                    if soup.find(id="ap_signin_pagelet_title").find("h1").text.strip()=="Sign In":
                                        resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
                                    else:
                                        raise
                order = self.db.merchantOrder.find_one({"orderId":orderId})
                if order is not None:
                    self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})
            except:
                order = self.db.merchantOrder.find_one({"orderId":orderId})
                if order is not None:
                    self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"parse_failed"}})
                print "Error occurred"
                resp['result'] = 'DETAIL_NOT_CREATED_UNKNOWN'
                traceback.print_exc()
        return resp    
                    
    #This should be exposed from api for specific sources
    def scrapeStoreOrders(self):
        orders = self.db.merchantOrder.find({"storeId":1, "closed":False, "subOrders.closed":False, "subOrders.trackingUrl":{"$exists":True}})
        for merchantOrder in orders:
            executeBulk = False
            try:
                bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
                closed = True
                map1 = {}
                for subOrder in merchantOrder.get("subOrders"):
                    if subOrder.get("closed"):
                        continue
                    elif subOrder.get("trackingUrl") is None:
                        closed = False
                        continue
                    findMap = {"orderId":merchantOrder.get("orderId"), "subOrders.merchantSubOrderId":subOrder.get("merchantSubOrderId")} 
                    trackingUrl = subOrder.get("trackingUrl")
                    if not map1.has_key(trackingUrl):
                        map1[trackingUrl] = self.parseTrackingUrl(trackingUrl, merchantOrder.get("orderId"))
                    newOrder = map1.get(trackingUrl)
                    if newOrder:
                        executeBulk = True
                        updateMap = self.getUpdateMap(newOrder, subOrder.get('cashBackStatus'))
                        print findMap, "\n", updateMap
                        bulk.find(findMap).update({'$set' : updateMap})
                        closed = closed and newOrder['closed']
                if executeBulk:
                    bulk.find({"orderId":merchantOrder.get("orderId")}).update({"$set":{"closed":closed, "parseError":False}})
                    bulk.execute()
            except:
                tprint("Could not update " + str(merchantOrder['orderId']) + " For store " + self.getName())
                self.db.merchantOrder.update({"orderId":merchantOrder['orderId']}, {"$set":{"parseError":True}})
                traceback.print_exc()
                    
                
             
    def parserest(self, soup):
        print "Hi"
        if soup.find('h1'):
            print "OK"
        
    def parseOldStlye(self, merchantOrder, soup):
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
        table = soup.body.findAll("table", recursive=False)[1]
        #print table
        tables = table.tr.td.findAll("table", recursive=False)
        for tr in tables[2].findAll("tr"):
            boldElement = tr.td.b
            if "Order Placed" in str(boldElement):
                merchantOrder.placedOn = boldElement.next_sibling.strip()
            if "order number" in str(boldElement):
                merchantOrder.merchantOrderId = boldElement.next_sibling.strip()
            if "Order Total" in str(boldElement):
                merchantOrder.paidAmount = int(float(boldElement.find('span').contents[-1].replace(',','')))
        anchors = table.tr.td.findAll("a", recursive=False)
        paymentAnchor = anchors.pop(-1)
        
        count = 0
        subOrders = []
        merchantOrder.subOrders = subOrders
        counter = 0 
        for anchor in anchors:
            count += 1
            tab = anchor.next_sibling
            status = MStore.ORDER_PLACED
            subStr = "Delivery #" + str(count) + ":"
            if subStr in  tab.find("b").text:
                detailedStatus = tab.find("b").text.replace(subStr, '').strip()
            
            tab = tab.next_sibling.next_sibling
            trs = tab.find("table").find('tbody').findAll("tr", recursive = False)
            
            estimatedDelivery = trs[0].td.find("b").next_sibling.strip()
            
            orderItemTrs = trs[1].findAll("td", recursive=False)[1].table.tbody.findAll("tr", recursive = False)
            i = -1
            for orderItemTr in orderItemTrs:
                i += 1
                if i%2 == 0:
                    continue
                counter += 1
                quantity =  int(re.findall(r'\d+', orderItemTr.td.contents[0])[0])
                
                productUrl = orderItemTr.td.contents[1].a["href"]
                productTitle = orderItemTr.td.contents[1].a.text
                
                unitPrice = int(float(orderItemTr.findAll('td')[1].span.text.replace('Rs. ','').replace(',','')))
                
            
                subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, unitPrice*quantity, status, quantity)
                subOrder.merchantSubOrderId = str(counter) + " of " + merchantOrder.merchantOrderId
                subOrder.estimatedDeliveryDate = estimatedDelivery
                estDlvyTime = datetime.strptime(estimatedDelivery.split('-')[0].strip(), "%A %d %B %Y")
                createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)
                subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))
                subOrder.productCode = productUrl.split('/')[5]
                subOrder.detailedStatus = detailedStatus
                (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, unitPrice*quantity)
                dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
                subOrder.dealRank = dealRank.get('rank')
                subOrder.rankDesc = dealRank.get('description')
                subOrder.maxNlc = dealRank.get('maxNlc')
                subOrder.minNlc = dealRank.get('minNlc')
                subOrder.db = dealRank.get('dp')
                subOrder.itemStatus = dealRank.get('status')
                cashbackStatus = Store.CB_PENDING
                if cashbackAmount <= 0:
                    cashbackStatus = Store.CB_NA
                subOrder.cashBackStatus = cashbackStatus
                subOrder.cashBackAmount = cashbackAmount
                if percentage > 0:
                    subOrder.cashBackPercentage = percentage
                subOrders.append(subOrder)
        priceList = paymentAnchor.next_sibling.next_sibling.next_sibling.table.table.tbody.tbody.tbody.findAll('tr', recursive=False)
        totalAmount = 0
        grandAmount = 0
        for price in priceList:
            labelTd = price.td
            if 'Subtotal:' in labelTd.text:
                totalAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))
            elif 'Grand Total:' in labelTd.text:
                grandAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))
        if grandAmount < totalAmount:
            diff = totalAmount - grandAmount
            for subOrder in merchantOrder.subOrders:
                subOrder.amountPaid -= int(diff*(1-subOrder.amountPaid/totalAmount))
        merchantOrder.status='success'
        self._updateToOrder(todict(merchantOrder))

    def parseNewStlye(self, merchantOrder, soup):
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
        orderDetailsContainer = soup.body.find(id="orderDetails")
        divAfterH1 = orderDetailsContainer.h1.next_sibling.next_sibling
        orderLeftDiv = divAfterH1.div
        placedOnSpan = orderLeftDiv.find("span", {'class':'order-date-invoice-item'})
        merchantOrder.placedOn =placedOnSpan.text.split('Ordered on')[1].strip()
        merchantOrder.merchantOrderId = placedOnSpan.next_sibling.next_sibling.text.split('Order#')[1].strip()
        try:
            priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box-inner"}).div.div.findAll('div', recursive=False)[-1]
        except:
            priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box a-last"}).div.div.findAll('div', recursive=False)[-1]
        priceRows = priceBox.findAll('div', {'class':'a-row'})
        subTotal = 0
        shippingPrice = 0   
        promoApplied = 0
        for priceRow in priceRows:
            if "Item(s) Subtotal:" in str(priceRow):
                subTotal = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
            elif "Shipping:" in str(priceRow):
                shippingPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
            elif "Grand Total:" in str(priceRow):
                grandPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
                merchantOrder.paidAmount = grandPrice
            elif "Total:" in str(priceRow):
                totalPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
            elif "Promotion Applied:" in str(priceRow):
                promoApplied  += int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
        totalPaid = subTotal        
        if promoApplied > 0:
            totalPaid -= promoApplied
            if shippingPrice <= promoApplied:
                totalPaid  += shippingPrice
        
        shipmentDivs = orderDetailsContainer.find('div', class_='shipment').findAll('div', recursive = False)
        subOrders = []
        merchantOrder.subOrders = subOrders
        closedStatus = True
        for shipmentDiv in shipmentDivs:
            try:
                trackingUrl = 'http://www.amazon.in/' + shipmentDiv.find('span', class_='track-package-button').span.a.get('href')
            except:
                trackingUrl= None
                
            deliverySpanTop = None
            innerBoxes = shipmentDiv.findAll('div', recursive = False)
            statusDiv = innerBoxes[0]
            subOrderStatus = statusDiv.div.span.text.strip()
            try:
                deliverySpanTop = statusDiv.div.div.find_next_sibling('div').span
                productDivs = innerBoxes[-1].div.div.div.findAll('div', recursive=False)
            except:
                productDivs = innerBoxes[-1].div.div.findAll('div', recursive=False)
            subOrders = []
            merchantOrder.subOrders = subOrders
            for i, productDiv in enumerate(productDivs):
                deliverySpan = deliverySpanTop
                j=i+1
                if not deliverySpanTop:
                    if i%2==1:
                        continue
                    j=i/2 + 1
                    deliverySpan = productDiv.div.div.span
                    productDiv = productDivs[i+1].div
                imgDiv  = productDiv.div.div
                detailDiv = imgDiv.find_next_sibling('div')
                detailDivs = detailDiv.findAll('div', recursive=False)
                arr = detailDivs[0].a.text.strip().split(" of ", 1)
                (productTitle, quantity) = (arr[-1], (1 if len(arr)==1 else int(arr[0])) )
                try:
                    unitPrice = int(float(detailDivs[2].span.text.replace('Rs. ','').replace(',','')))
                except:
                    unitPrice = int(float(detailDivs[3].span.text.replace('Rs. ','').replace(',','')))
                amountPaid = int((unitPrice*quantity*totalPaid)/subTotal)
                productUrl = "http://www.amazon.in" + detailDivs[0].a.get('href')
                subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)
                subOrder.productCode = productUrl.split('/')[5]
                subOrder.unitPrice = unitPrice
                subOrder.merchantSubOrderId = str(j) + " of " + merchantOrder.merchantOrderId
                estDlvyTime = datetime.now()
                if deliverySpan is not None:
                    try:
                        subOrder.estimatedDeliveryDate = deliverySpan.span.text.strip()
                        estDate = subOrder.estimatedDeliveryDate.split("-")[0].strip()
                        subOrder.estimatedDeliveryInt = int(time.mktime((datetime.strptime(estDate, "%A %d %B %Y")).timetuple()))
                        estDlvyTime = datetime.strptime(estDate, "%A %d %B %Y")
                    except:
                        if "Delivered on" in deliverySpan.text:
                            subOrder.deliveredOn = deliverySpan.text.split(":")[1].strip() 
                        subOrder.estimatedDeliveryDate = "Not available"
                        
                createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)
                subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))
                subOrder.detailedStatus = subOrderStatus
                subOrder.deliveryCharges = shippingPrice
                if trackingUrl:
                    subOrder.tracingkUrl  = trackingUrl
                subOrder.imgUrl = imgDiv.img["src"]
                (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amountPaid)
                dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
                subOrder.dealRank = dealRank.get('rank')
                subOrder.rankDesc = dealRank.get('description')
                cashbackStatus = Store.CB_PENDING
                if cashbackAmount <= 0:
                    cashbackStatus = Store.CB_NA
                subOrder.cashBackStatus = cashbackStatus
                subOrder.cashBackAmount = cashbackAmount
                if percentage > 0:
                    subOrder.cashBackPercentage = percentage
                if hasattr(subOrder, 'deliveredOn'):                               
                    subOrder.status = Store.ORDER_DELIVERED
                    subOrder.closed = True
                    if subOrder.cashBackStatus == Store.CB_PENDING:
                        subOrder.cashBackStatus = Store.CB_APPROVED
                elif closedStatus:
                    closedStatus= False
                subOrders.append(subOrder)
        merchantOrder.status='success'
        merchantOrder.closed = closedStatus
        self._updateToOrder(todict(merchantOrder))
        
    def parseCancelled(self, merchantOrder,soup):
        try:
            fonts = soup.body.findAll("table", recursive=False)[1].findAll("font")
            if fonts[0].text == "Important Message":
                if fonts[1].text=="This order has been cancelled.":
                    merchantOrder.closed = True
                    merchantOrder.status = "cancelled"
                    merchantOrder.requireDetail = False
                    self._updateToOrder(todict(merchantOrder))
                    return
                else:
                    raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
            else:
                raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
        except:
            orderDetails = soup.body.find(id="orderDetails")
            if orderDetails is not None and orderDetails.h4.text == "This order has been cancelled.":
                merchantOrder.closed = True
                merchantOrder.status = "cancelled"
                merchantOrder.requireDetail = False
                self._updateToOrder(todict(merchantOrder))
            else:
                raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)

    def getTrackingUrls(self, userId):
        
        missingOrderUrls = []
        missingOrders = self._getMissingOrders({'userId':userId})
        for missingOrder in missingOrders:
            missingOrderUrls.append(ORDER_REDIRECT_URL%(missingOrder['merchantOrderId']))
        orders = self._getActiveOrders({'userId':userId})
        count = len(orders)
        print "count", count
        print "Missing Urls"
        print "*************"
        print missingOrderUrls
        if count > 0:
            return missingOrderUrls + ['https://www.amazon.in/gp/css/order-history', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled&startIndex=10']
        else: 
            return missingOrderUrls
            
    def trackOrdersForUser(self, userId, url, rawHtml):
        directory = "/AmazonTrack/User" + str(userId)
        if not os.path.exists(directory):
            os.makedirs(directory)
        
        
        try:
            searchMap = {'userId':userId}
            collectionMap = {'merchantOrderId':1}
            activeOrders = self._getActiveOrders(searchMap, collectionMap)
            datetimeNow = datetime.now()
            timestamp = int(time.mktime(datetimeNow.timetuple()))
            print "url----------------", url
            
            if url == 'https://www.amazon.in/gp/css/order-history' or 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled' in url:
                if url == 'https://www.amazon.in/gp/css/order-history':
                    filename = directory + "/orderSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')
                else:   
                    filename = directory + "/cancelledSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')   
                f = open(filename,'w')
                f.write(rawHtml) # python will convert \n to os.linesep
                f.close() # you can omit in most cases as the destructor will call if
                soup = BeautifulSoup(rawHtml)
                allOrders = soup.find(id="ordersContainer").findAll('div', {'class':'a-box-group a-spacing-base order'})
                bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
                for activeOrder in activeOrders:
                    matched=False
                    for orderEle in allOrders:
                        deliveredOn = None
                        deliveryEstimate = None
                        shippingEstimate = None
                        orderdiv = orderEle.find('div', {'class':'a-box a-color-offset-background order-info'}).find('div', {'class':'a-fixed-right-grid-col actions a-col-right'})
                        merchantOrderId = orderdiv.find('span', {'class':'a-color-secondary value'}).text.strip()
                        if merchantOrderId==activeOrder['merchantOrderId']:
                            matched=True
                            closed = True
                            shipments = orderEle.findAll('div',{'class':re.compile('.*?shipment.*?')}, recursive=False)
                            for shipment in shipments:
                                orderStatusDesc = None
                                shipdiv = shipment.find('div', {'class':'a-box-inner'})
                                sdivs = shipment.div.div.findAll('div', recursive=False)
                                try:
                                    orderStatus = sdivs[0].span.text.strip()
                                    status = self._getStatusFromDetailedStatus(orderStatus)
                                except:
                                    try:
                                        dateString = orderStatus.split("Delivered ")[1].strip()
                                        status = MStore.ORDER_DELIVERED
                                        deliveredOn = datetime.strftime(getDateStringDelivered(dateString), '%d-%b-%y')
                                    except:
                                        try:
                                            dateString = sdivs[0].span.text.strip().split("Arriving ")[1].split("by")[0].strip()
                                            status = MStore.ORDER_SHIPPED
                                            deliveryEstimate = datetime.strftime(getDateStringArriving(dateString), '%d-%b-%y')
                                        except:
                                            print "Unknown status Alert -", orderStatus
                                print merchantOrderId, "Order Status", orderStatus
                                try:
                                    orderStatusDesc = sdivs[0].findAll('div')[1].div.text.strip()
                                except:
                                    try:
                                        orderStatusDesc = sdivs[0].findAll('div')[1].text.strip()
                                    except:
                                        print "Order Status Description None or empty for", merchantOrderId, "and User", userId

                                if orderStatusDesc: 
                                    print merchantOrderId, "Order status desc", orderStatusDesc
                                    try:
                                        status = self._getStatusFromDetailedStatus(orderStatus)
                                    except:
                                        pass
                                    try:
                                        if "Dispatch estimate" in orderStatusDesc:
                                            shippingEstimate = orderStatus.split("Dispatch estimate").split("-")[0].strip()
                                        elif "Delivery estimate" in orderStatus:
                                            deliveryEstimate = orderStatus.split("Delivery estimate").split("-")[0].strip()
                                        elif "Arriving" in orderStatus:
                                            deliveryEstimate = datetime.strftime(getDateStringArriving(orderStatus.split("Arriving")[1].strip().split("by")[0].strip()), '%d-%b-%y')
                                    except:
                                        print "Could not find anything relevent for merchantOrder", merchantOrderId, "and User", userId 
                                        closed=False
                                        status = None

                                productDivs = shipdiv.find('div', {'class':re.compile('.*?a-spacing-top-medium.*?')}).find('div', {'class':'a-row'}).findAll('div', recursive=False)
                                trackingUrl = None
                                for buttonDiv in shipdiv.findAll('span', {'class':'a-button-inner'}):
                                    if buttonDiv.find('a').text.strip()=='Track package':
                                        trackingUrl = buttonDiv.find('a')['href'].strip()
                                        if not trackingUrl.startswith("http"):
                                            trackingUrl = "http://www.amazon.in" + trackingUrl
                                        break
                                for prodDiv in productDivs:
                                    prodDiv.find('div', {'class':'a-fixed-left-grid-inner'})
                                    productTitle = prodDiv.find('div', {'class':'a-fixed-left-grid-inner'}).find("div", {'class':'a-row'}).find('a').text.strip()
                                    imgUrl = prodDiv.find("img")["src"]
                                    for subOrder in activeOrder['subOrders']:
                                        if subOrder['closed']==True:
                                            continue
                                        if subOrder['productTitle'] in productTitle:
                                            findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId")}
                                            updateMap = {}
                                            closedStatus = False
                                            updateMap['subOrders.$.imgUrl'] = imgUrl
                                            updateMap['subOrders.$.lastTracked'] = timestamp
                                            if status:
                                                updateMap['subOrders.$.detailedStatus'] = orderStatus
                                                updateMap['subOrders.$.status'] = status 
                                                cashbackStatus = subOrder.get("cashBackStatus")
                                        
                                            if status==MStore.ORDER_DELIVERED:
                                                if deliveredOn:                               
                                                    updateMap['subOrders.$.deliveredOn'] = deliveredOn
                                                closedStatus = True
                                                updateMap['subOrders.$.closed'] = True
                                                if cashbackStatus == Store.CB_PENDING:
                                                    updateMap['subOrders.$.cashBackStatus'] = Store.CB_APPROVED
                                            if status==MStore.ORDER_CANCELLED:     
                                                closedStatus = True
                                                updateMap['subOrders.$.closed'] = True
                                                if cashbackStatus == Store.CB_PENDING:
                                                    updateMap['subOrders.$.cashBackStatus'] = Store.CB_CANCELLED
                                            if status==MStore.ORDER_SHIPPED:
                                                if deliveryEstimate:   
                                                    updateMap['subOrders.$.estimatedDeliveryDate'] = deliveryEstimate
                                                if trackingUrl is not None:
                                                    updateMap['subOrders.$.trackingUrl'] = trackingUrl
                                                    updateMap['subOrders.$.trackMissing'] = False
                                            if shippingEstimate:   
                                                updateMap['subOrders.$.estimatedShippingDate'] = shippingEstimate
                                            if not closedStatus:
                                                closed = False
                                            #{"subOrders.closed":False,"subOrders.trackingUrl":{"$exists":False},"subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}
                                            updateMap['status']='success'
                                            bulk.find(findMap).update({'$set' : updateMap})
                                            break
                            bulk.find({'orderId': activeOrder['orderId']}).update({"$set":{'closed':closed}})
                            break
                    if not matched:
                        updateMap = {'subOrders.$.trackMissing': True}
                        for subOrder in activeOrder['subOrders']:
                            if subOrder['closed']==True:
                                continue
                            findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId"), 
                                       "subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}}
                            bulk.find({'orderId': activeOrder['orderId']})
                            bulk.find(findMap).update({'$set':updateMap})
                bulk.execute()
                return 'PARSED_SUCCESS'
            else:
                merchantOrderId = re.findall(r'https://www.amazon.in/gp/css/summary/edit.html\?orderID=(.*)?', url, re.IGNORECASE)[0]
                print  "merchantOrderId", merchantOrderId
                merchantOrder = self.db.merchantOrder.find_one({"merchantOrderId":merchantOrderId})
                
                filename = directory + "/" + merchantOrderId   
                f = open(filename,'w')
                f.write(rawHtml) # python will convert \n to os.linesep
                f.close() # you can omit in most cases as the destructor will call if
                result = self.parseOrderRawHtml(merchantOrder['orderId'], merchantOrder['subTagId'], merchantOrder['userId'], rawHtml, url, True)['result']
                print "result", result
                try:
                    order1 = session.query(OrdersRaw).filter_by(id=merchantOrder['orderId']).first()
                    order1.status = result
                    order1.rawhtml = rawHtml
                    order1.order_url = url
                    session.commit()
                except:
                    traceback.print_exc()
                finally:
                    session.close()
                return 'PARSED_SUCCESS'
                pass
            return 'PARSED_SUCCESS_NO_ORDERS'
        except:
            traceback.print_exc()    
            return 'PARSED_FAILED'
            
    def _getStatusFromDetailedStatus(self, detailedStatus):
        if "ordered from" in detailedStatus.lower():
            return MStore.ORDER_PLACED 

        for key, value in self.orderStatusRegexMap.iteritems():
            if detailedStatus.lower() in value:
                return key
        
        print "Detailed Status need to be mapped", "Store:", self.store_id, detailedStatus
        raise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)
    def scrapeAffiliate(self, startDate=None, endDate=None):
        br = getBrowserObject()
        br.add_password('https://assoc-datafeeds-eu.amazon.com', 'Saholic', 'Fnubyvp')
        url = AMAZON_AFF_URL
        response = br.open(url)
        #get data for past 40 days and store it to mongo
        dt = datetime.now()
        dat = dt - timedelta(days=2)
        url = AMAZON_AFF_FILE_URL%(datetime.strftime(dat, "%Y%m%d"))
        response = br.open(url)
        page = gzip.GzipFile(fileobj=response, mode='rb').read()
        j=-1
        for row in page.split("\n"):
            j += 1
            if j== 0 or j==1:
                continue
            fields = row.split("\t")
            if len(fields)>1:
                print fields
                amazonAffiliate = AmazonAffiliateInfo(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], fields[8], fields[9])
                print amazonAffiliate
                self.db.amazonAffiliateInfo.insert(todict(amazonAffiliate))
            else:
                break
                
    
    def parseTrackingUrl(self, trackingUrl, orderId):
        print trackingUrl
        subOrder = {}
        page = fetchResponseUsingProxy(trackingUrl)
        status = MStore.ORDER_SHIPPED
        #print page
        soup = BeautifulSoup(page)
        header1 = soup.find("h1") 
        if header1:
            if header1.text=="Sign In":
                print "Login page is displayed for order id", orderId
                self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})
                return subOrder
        try:
            print "Tracking page is displayed for order id", orderId
            detailedStatus = soup.find("div", {"class":"top"}).span.text.strip()
            try:
                displayStatus = soup.find("div",{"class":"a-column a-span12 shipment-status-content"}).span.text.strip()
            except:
                displayStatus = detailedStatus
            print displayStatus
            if detailedStatus.lower().find("delivered")>=0:
                print detailedStatus
                displayStatus = "Delivered"
                status = "Delivered"
                try:
                    subOrder["deliveredOn"] = detailedStatus.split("on")[1].strip()
                except:
                    pass
                
            elif detailedStatus.lower() == 'returned':
                status = 'Cancelled'
            subOrder['status'] = status
            subOrder['detailedStatus'] = displayStatus 
            self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":False}})    
            
        except:
            self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})    
            print "failed to parse", orderId
            traceback.print_exc()
                
        return subOrder
            
             
            
    def parseInfo(self,):
        from pyquery import PyQuery as pq
        orders = list(session.query(Orders).filter_by(store_id=self.store_id).filter_by(status='DETAIL_CREATED').group_by(Orders.user_id).all())
        try:
            for order in orders:
                try:
                    doc = pq(order.rawhtml)
                    #a1= " ".join(["" if not div.text else div.text.replace("\t","").replace("\n","").replace(" ", "") for div in pq(doc('article')[-1])('div')])
                    lists = doc('ul.displayAddressUL li')
                    orderInfo = All_user_addresses()
                    orderInfo.address = lists[-3].text
                    orderInfo.user_id = order.user_id
                    orderInfo.source = 'order' 
                    #orderInfo.order_id = order.id
                    #orderInfo.email = None
                    #orderInfo.name = lists[0].text 
                    #orderInfo.mobile = None
                    adSplit = lists[-2].text.split(",")
                    match = re.match(r"([a-z ]+)([0-9]+)", adSplit[1], re.I)
                    if match:
                        items = match.groups()
                    orderInfo.city = adSplit[0].strip()
                    orderInfo.pincode = items[1].strip()
                    orderInfo.state = items[0].strip().title()
                    session.commit()
                except:
                    session.rollback()
                    continue
        finally:
            session.close()
        


def main():
    store = getStore(1)
    for order in store.db.merchantOrder.find({"orderId":{"$gt":47644}, "storeId":3}):
        try:
            orderId=order.get("orderId")
            o = session.query(OrdersRaw).filter_by(id = orderId).one()
            o.status = 'ORDER_CREATED'
            session.commit()
        finally:
            session.close()
    #store.trackOrdersForUser(8703, 'https://www.amazon.in/gp/css/order-history', readSSh('/home/amit/Downloads/orderSummary06-10_12_15_54'))
    
def getSummaryFile(directory):
    date1 = datetime(2015,1,1)
    finalFile = None
    try:
        for file in os.listdir(directory):
            if file.startswith("orderSummary"):
                date2 = datetime.strptime("2015-" + file.split("orderSummary")[1].split(":")[0], "%Y-%d-%m")
                if date2 > date1:
                    date1 = date2
                    finalFile=file
    except:
        print "Missing directory"
    return finalFile 
                

def parseDetailNotCreated():
    try:
        store=getStore(1)
        orders = session.query(OrdersRaw).filter_by(status='DETAIL_NOT_CREATED_UNKNOWN').all()
        session.close()
        for order in orders:
            store.trackOrdersForUser(order.id, order.order_url, order.rawhtml)
        
    finally:
        session.close()
        
def getDateStringDelivered(dateString='Monday'):
    print dateString
    if dateString.lower()=='today':
        return date.today()
    if dateString.lower()=='yesterday':
        return date.today() - timedelta(days=1)
    try:
        return datetime.strptime(dateString, '%d-%b-%y')
    except:
        try:
            #get Closest Date from today
            curDate = date.today()
            curTime = datetime(curDate.year, curDate.month, curDate.day)
            curYear = curDate.year
            prevYear = curYear - 1
            dateMax = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")
            dateMin = datetime.strptime(dateString + " " + str(prevYear), "%A, %d %b %Y")
            if dateMax <= curTime:
                return dateMax
            else:
                return dateMin 
        except:
            try:
                days_of_week = ['sunday','monday','tuesday','wednesday',
                            'thursday','friday','saturday']
                deltaDays = curDate.isoweekday() - days_of_week.index(dateString.lower())
                if deltaDays <= 0:
                    deltaDays= deltaDays + 7
                curDate = curDate - timedelta(days=deltaDays)
                print datetime.strftime(curDate, '%d-%b-%y')
                return curDate
            except:
                print "could not parse"
                return None

def getDateStringArriving(dateString='Thursday'):
    print dateString
    if dateString.lower()=='today':
        return date.today()
    if dateString.lower()=='tomorrow':
        return date.today() + timedelta(days=1)
    try:
        return datetime.strptime(dateString, '%d-%b-%y')
    except:
        try:
            #get Closest Date from today
            curDate = date.today()
            curTime = datetime(curDate.year, curDate.month, curDate.day)
            curYear = curDate.year
            nextYear = curYear + 1
            dateMin = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")
            dateMax = datetime.strptime(dateString + " " + str(nextYear), "%A, %d %b %Y")
            if dateMin >= curTime:
                return dateMin
            else:
                return dateMax 
        except:
            try:
                days_of_week = ['sunday','monday','tuesday','wednesday',
                            'thursday','friday','saturday']
                deltaDays = days_of_week.index(dateString.lower()) - curDate.isoweekday()
                if deltaDays < 0:
                    deltaDays= deltaDays + 7
                curDate = curDate + timedelta(days=deltaDays)
                return curDate
            except:
                print "Could not parse"
                return None 

def parseOrderNotCreated():
    try:
        store=getStore(1)
        orders = session.query(OrdersRaw).filter_by(status='ORDER_NOT_CREATED_UNKNOWN').all()
        session.close()
        for order in orders:
            result = store.parseOrderRawHtml(order.id, order.sub_tag, order.user_id, order.rawhtml, order.order_url)['result']
            order1 = session.query(OrdersRaw).filter_by(id=order.id).first()
            order1.status = result
            session.commit()
        
    finally:
        session.close()
    
if __name__ == '__main__':
#        readSSh("~/46417.html")
        main()
        #getDateStringArriving()