Subversion Repositories SmartDukaan

Rev

Rev 20699 | Blame | Compare with Previous | Last modification | View Log | RSS feed

# coding=utf-8
'''
Created on Jan 15, 2015

@author: amit
'''
from base64 import encode
from bs4 import BeautifulSoup
from datetime import datetime, timedelta, date
from dtr.api.Order import process_rejects
from dtr.dao import Order, SubOrder, AmazonAffiliateInfo, obj
from dtr.main import getStore, Store as MStore, ParseException, getBrowserObject, \
    ungzipResponse, tprint
from dtr.storage.DataService import OrdersRaw, Orders, Order_Parse_Info, \
    All_user_addresses
from dtr.storage.Mongo import getDealRank
from dtr.utils import utils
from dtr.utils.utils import fetchResponseUsingProxy, readSSh, todict
from elixir import *
import base64
import dtr
import gzip
import mechanize
import os.path
from pyquery import PyQuery as pq
import re
import time
import traceback
import urllib2
import urlparse

ORDER_REDIRECT_URL = 'https://www.amazon.in/gp/css/summary/edit.html?orderID=%s'
ORDER_SUCCESS_URL = 'https://www.amazon.in/gp/buy/spc/handlers/static-submit-decoupled.html'
THANKYOU_URL = 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html'
AMAZON_AFF_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/listReports'
AMAZON_AFF_FILE_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/getReport?filename=saholic-21-orders-report-%s.tsv.gz'
class Store(MStore):
    
    orderStatusRegexMap = { MStore.ORDER_PLACED : ['ordered from', 'not yet dispatched','dispatching now', 'preparing for dispatch', 'order received'],
                            MStore.ORDER_SHIPPED : ['dispatched on','dispatched', 'on the way', 'out for delivery', 'Out for delivery'],
                            MStore.ORDER_CANCELLED : ['return complete', 'refunded', 'cancelled', 'replacement complete', 'return received'],
                            MStore.ORDER_DELIVERED : ['delivered', 'your package was delivered', 'package was handed directly to customer']
                           }

    def __init__(self,store_id):
        super(Store, self).__init__(store_id)
        
    def getName(self):
        return "amazon"
    
    def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl, track=False):
        rawHtml = re.sub(r'[^\x00-\x7F]+',' ', rawHtml)
        parseString = "Tracking" if track else "Transacted"
        print parseString, "Order Id to be parsed is :", orderId
        resp = {}
        resp['result'] = 'ORDER_NOT_CREATED'
        if ORDER_SUCCESS_URL in orderSuccessUrl or THANKYOU_URL in orderSuccessUrl:
            try:
                doc = pq(rawHtml)
                try:
                    orderUrl = doc('a.a-touch-link').attr.href
                    merchantOrderId = re.findall(r'.*&oid=(.*)&?.*?', orderUrl)[0]
                except:
                    merchantOrderId = doc("#orders-list>div>span>b").html()
                    if not merchantOrderId:
                        merchantOrderId = urlparse.parse_qs(urlparse.urlsplit(orderSuccessUrl).query)
                    else:
                        raise
                if not merchantOrderId  or not re.match("\d+-\d+-\d+", merchantOrderId):
                    raise 
                order = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, True)
                order.orderTrackingUrl = ORDER_REDIRECT_URL % (merchantOrderId)
                order.orderSuccessUrl = orderSuccessUrl
                order.merchantOrderId = merchantOrderId
                order.requireDetail = True
                order.status = 'html_required'
                order.closed = None
                if self._saveToOrder(todict(order)):
                    resp['result'] = 'ORDER_CREATED'
                    resp["url"] = ORDER_REDIRECT_URL % (merchantOrderId)
                    resp["htmlRequired"] = True
                    resp['orderId'] = orderId
                else:
                    resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'
                    
            except:
                #Write all cases here for Order Not created Known
                soup = BeautifulSoup(rawHtml, "html5lib")
                try:
                    if not soup.body:
                        resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                    elif 'Securely redirecting you' in soup.find("h3").text.strip() or soup.find("h3").text.strip()=="Orders":
                        resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                    else:
                        raise
                except:
                    try:
                        if soup.find("h1").text.strip() in ['This is a duplicate order', 'There was a problem with your payment.', 'Your Orders', 'Your Shopping Cart is empty.', 'Select a payment method', 'Edit quantities'] or "Saved for later" in soup.find("h1").text.strip():
                            resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                        else:
                            raise
                    except:
                        try:
                            if soup.find("h2").text.strip() in ['Web page not available','Webpage not available', 'Do you have an Amazon password?']:
                                resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                            else:
                                raise
                        except:
                            try:
                                if soup.find(id="loading-spinner-img") is not None or soup.find(id="anonCarousel1") is not None or soup.find(id="ap_signin_pagelet_title") is not None or soup.find(id="nav-greeting-name") is not None:
                                    resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                                elif soup.find("b", {'class':'h1'}).text.strip().find("We're sorry") > -1:
                                    resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
                                else:
                                    raise
                            except:
                                resp['result'] = 'ORDER_NOT_CREATED_UNKNOWN'
                                
        else:
            try:
                mo = self.db.merchantOrder.find_one({"orderId":orderId})
                if mo is not None:
                    merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, False)
                    merchantOrder.createdOn = mo.get("createdOn")
                    merchantOrder.createdOnInt =  mo.get("createdOnInt")
                else:
                    print "Could not find amazon order with order Id", orderId
                    merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
                soup = BeautifulSoup(rawHtml, "html5lib")
                if not soup.body:
                    resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
                else:   
                    try:
                        self.parseNewStlye(merchantOrder, soup)
                        resp['result'] = 'DETAIL_CREATED'
                    except:
                        try:
                            traceback.print_exc()
                            self.parseAnotherStlye(merchantOrder, pq(rawHtml))
                            resp['result'] = 'DETAIL_CREATED'
                        except:
                            try:
                                traceback.print_exc()
                                self.parseOldStlye(merchantOrder, soup)
                                resp['result'] = 'DETAIL_CREATED'
                            except:
                                traceback.print_exc()
                                try:
                                    self.parseCancelled(merchantOrder, soup)
                                    resp['result'] = 'ORDER_CANCELLED'
                                except:
                                    try:
                                        if soup.find("h1").text.strip() in ["Your Account"] or soup.find("h1").span.text=="Account":
                                            resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
                                        else:
                                            raise
                                    except:
                                        if soup.find(id="ap_signin_pagelet_title").find("h1").text.strip()=="Sign In":
                                            resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
                                        else:
                                            raise
                if resp['result'] == 'DETAIL_NOT_CREATED_KNOWN':
                    self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})
            except:
                self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})
                print "Error occurred"
                resp['result'] = 'DETAIL_NOT_CREATED_UNKNOWN'
                traceback.print_exc()
        return resp    
                    
    #This should be exposed from api for specific sources
    def scrapeStoreOrders(self):
        orders = self.db.merchantOrder.find({"storeId":1, "closed":False, "subOrders.closed":False, "subOrders.trackingUrl":{"$exists":True}, "subOrders.login":{"$exists":False}})
        for merchantOrder in orders:
            executeBulk = False
            try:
                bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
                closed = True
                map1 = {}
                for subOrder in merchantOrder.get("subOrders"):
                    if subOrder.get("closed"):
                        continue
                    elif subOrder.get("trackingUrl") is None:
                        closed = False
                        continue
                    findMap = {"orderId":merchantOrder.get("orderId"), "subOrders.merchantSubOrderId":subOrder.get("merchantSubOrderId")} 
                    trackingUrl = subOrder.get("trackingUrl")
                    if not map1.has_key(trackingUrl):
                        map1[trackingUrl] = self.parseTrackingUrl(trackingUrl, merchantOrder.get("orderId"))
                    newOrder = map1.get(trackingUrl)
                    if newOrder:
                        executeBulk = True
                        updateMap = self.getUpdateMap(newOrder, subOrder.get('cashBackStatus'))
                        print findMap, "\n", updateMap
                        bulk.find(findMap).update({'$set' : updateMap})
                        closed = closed and newOrder['closed']
                if executeBulk:
                    bulk.find({"orderId":merchantOrder.get("orderId")}).update({"$set":{"closed":closed, "parseError":False}})
                    bulk.execute()
            except:
                tprint("Could not update " + str(merchantOrder['orderId']) + " For store " + self.getName())
                self.db.merchantOrder.update({"orderId":merchantOrder['orderId']}, {"$set":{"parseError":True}})
                traceback.print_exc()
                    
                
             
    def parserest(self, soup):
        print "Hi"
        if soup.find('h1'):
            print "OK"
        
    def parseOldStlye(self, merchantOrder, soup):
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
        table = soup.body.findAll("table", recursive=False)[1]
        #print table
        tables = table.tr.td.findAll("table", recursive=False)
        for tr in tables[2].findAll("tr"):
            boldElement = tr.td.b
            if "Order Placed" in str(boldElement):
                merchantOrder.placedOn = boldElement.next_sibling.strip()
            if "order number" in str(boldElement):
                merchantOrder.merchantOrderId = boldElement.next_sibling.strip()
            if "Order Total" in str(boldElement):
                merchantOrder.paidAmount = int(float(boldElement.find('span').contents[-1].replace(',','')))
        anchors = table.tr.td.findAll("a", recursive=False)
        paymentAnchor = anchors.pop(-1)
        
        count = 0
        subOrders = []
        merchantOrder.subOrders = subOrders
        counter = 0 
        for anchor in anchors:
            count += 1
            tab = anchor.next_sibling
            status = MStore.ORDER_PLACED
            subStr = "Delivery #" + str(count) + ":"
            if subStr in  tab.find("b").text:
                detailedStatus = tab.find("b").text.replace(subStr, '').strip()
            
            tab = tab.next_sibling.next_sibling
            trs = tab.find("table").find('tbody').findAll("tr", recursive = False)
            
            estimatedDelivery = trs[0].td.find("b").next_sibling.strip()
            
            orderItemTrs = trs[1].findAll("td", recursive=False)[1].table.tbody.findAll("tr", recursive = False)
            i = -1
            for orderItemTr in orderItemTrs:
                i += 1
                if i%2 == 0:
                    continue
                counter += 1
                quantity =  int(re.findall(r'\d+', orderItemTr.td.contents[0])[0])
                
                productUrl = orderItemTr.td.contents[1].a["href"]
                productTitle = orderItemTr.td.contents[1].a.text
                
                unitPrice = int(float(orderItemTr.findAll('td')[1].span.text.replace('Rs. ','').replace(',','')))
                
            
                subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, unitPrice*quantity, status, quantity)
                subOrder.merchantSubOrderId = str(counter) + " of " + merchantOrder.merchantOrderId
                subOrder.estimatedDeliveryDate = estimatedDelivery
                estDlvyTime = datetime.strptime(estimatedDelivery.split('-')[0].strip(), "%A %d %B %Y")
                createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)
                subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))
                subOrder.productCode = productUrl.split('/')[5]
                subOrder.detailedStatus = detailedStatus
                (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, unitPrice)
                dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
                subOrder.dealRank = dealRank.get('rank')
                subOrder.rankDesc = dealRank.get('description')
                subOrder.maxNlc = dealRank.get('maxNlc')
                subOrder.minNlc = dealRank.get('minNlc')
                subOrder.db = dealRank.get('dp')
                subOrder.itemStatus = dealRank.get('status')
                cashbackStatus = Store.CB_PENDING
                if cashbackAmount <= 0:
                    cashbackStatus = Store.CB_NA
                subOrder.cashBackStatus = cashbackStatus
                subOrder.cashBackAmount = cashbackAmount*quantity
                if percentage > 0:
                    subOrder.cashBackPercentage = percentage
                subOrders.append(subOrder)
        priceList = paymentAnchor.next_sibling.next_sibling.next_sibling.table.table.tbody.tbody.tbody.findAll('tr', recursive=False)
        totalAmount = 0
        grandAmount = 0
        for price in priceList:
            labelTd = price.td
            if 'Subtotal:' in labelTd.text:
                totalAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))
            elif 'Grand Total:' in labelTd.text:
                grandAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))
        if grandAmount < totalAmount:
            diff = totalAmount - grandAmount
            for subOrder in merchantOrder.subOrders:
                subOrder.amountPaid -= int(diff*(1-subOrder.amountPaid/totalAmount))
        merchantOrder.status='success'
        self._updateToOrder(todict(merchantOrder))
        
    def parseAnotherStlye(self, merchantOrder, pqobj):
        counter=0
        detailSection = None
        shipmentSection = None
        summarySection = None
        for el in pqobj('.a-section'):
            if "View order details" in pq(el).text():
                detailSection =  pq(el)
                counter += 1
            if "Shipment details" in pq(el).text():
                shipmentSection =  pq(el)
                counter += 1
            if "Order Summary" in pq(el).text():
                summarySection = pq(el)
                counter += 1
            if counter == 3:
                break
        
        i=-1    
        for s in shipmentSection('.a-box-group'):
            shipmentGroup = pq(s)
            for shipment in shipmentGroup('.a-box'):
                i += 1 
                if i==0:
                    continue
                shipment = pq(shipment)
                shipmentStatusSection = shipment('.a-section:eq(0)')
                productDetails = shipment('.a-section:eq(1)>.a-row')
                print shipmentStatusSection('h3').text(), shipmentStatusSection('p').text(), shipmentStatusSection('span').text()
                for productDetail in productDetails:
                    productDetail = pq(productDetail)
                    pImg = productDetail.children('div').eq(0)
                    pQty = productDetail.children('div').eq(1)
                    #print pImg('a').attr('href'), pImg('a').attr('title'), pImg('img').attr('src'), pQty('.a-row:nth-child(2)')('span').text().split(':')[1].strip(), pQty('span.currencyINR')
                    productUrl = pImg('a').attr('href'), 
                    qty = pQty('.a-row:nth-child(2)')('span').text().split(':')[1].strip()
                    price = int(float(pQty('nobr').text().replace('Rs.','').replace(',', '')))
                    #subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)
        raise   
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
        merchantOrder.placedOn
        merchantOrder.merchantOrderId
        merchantOrder.paidAmount

    def parseNewStlye(self, merchantOrder, soup):
        isPrime = False
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
        for script in soup.findAll("script"):
            if script.text:
                print script.text
                if "\"isPrime\":" in script.text:
                    isPrime = "\"isPrime\":1" in script.text
                    break
        orderDetailsContainer = soup.body.find(id="orderDetails")
        divAfterH1 = orderDetailsContainer.h1.next_sibling.next_sibling
        orderLeftDiv = divAfterH1.div
        placedOnSpan = orderLeftDiv.find("span", {'class':'order-date-invoice-item'})
        merchantOrder.placedOn =placedOnSpan.text.split('Ordered on')[1].strip()
        merchantOrder.merchantOrderId = placedOnSpan.next_sibling.next_sibling.text.split('Order#')[1].strip()
        try:
            priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box-inner"}).div.div.findAll('div', recursive=False)[-1]
        except:
            priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box a-last"}).div.div.findAll('div', recursive=False)[-1]
        priceRows = priceBox.findAll('div', {'class':'a-row'})
        subTotal = 0
        shippingPrice = 0   
        promoApplied = 0
        for priceRow in priceRows:
            if "Item(s) Subtotal:" in str(priceRow):
                subTotal = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
            elif "Shipping:" in str(priceRow):
                shippingPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
            elif "Grand Total:" in str(priceRow):
                grandPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
                merchantOrder.paidAmount = grandPrice
            elif "Total:" in str(priceRow):
                totalPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
            elif "Promotion Applied:" in str(priceRow):
                promoApplied  += int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
        totalPaid = subTotal        
        if promoApplied > 0:
            totalPaid -= promoApplied
            if shippingPrice <= promoApplied:
                totalPaid  += shippingPrice
        
        shipmentDivs = orderDetailsContainer.findAll('div', class_='shipment')
        subOrders = []
        merchantOrder.subOrders = subOrders
        closedStatus = True
        subOrders = []
        j=0
        for shipmentDiv in shipmentDivs:
            shipmentDiv = shipmentDiv.div
            try:
                trackingUrl = 'http://www.amazon.in/' + shipmentDiv.find('span', class_='track-package-button').span.a.get('href')
            except:
                trackingUrl= None
                
            deliverySpanTop = None
            innerBoxes = shipmentDiv.findAll('div', recursive = False)
            statusDiv = innerBoxes[0]
            subOrderStatus = statusDiv.div.span.text.strip()
            try:
                deliverySpanTop = statusDiv.div.div.find_next_sibling('div').span
            except:
                pass
            #if not deliverySpanTop:
            productDivs = innerBoxes[-1].div.div.findAll('div', recursive=False)
            merchantOrder.subOrders = subOrders
            for i, productDiv in enumerate(productDivs):
                deliverySpan = deliverySpanTop
                if not deliverySpanTop:
                    if i%2==1:
                        continue
                    deliverySpan = productDiv.div.div.span
                    try:
                        productDiv = productDivs[i+1]
                    except:
                        pass
                for  prodRow in productDiv.findAll('div', recursive=False):
                    j += 1
                    imgDiv  = prodRow.div.div
                    detailDiv = imgDiv.find_next_sibling('div')
                    detailDivs = detailDiv.findAll('div', recursive=False)
                    arr = re.split("^(\d+) of", detailDivs[0].a.text.strip())
                    (productTitle, quantity) = (arr[-1], (1 if len(arr)<2 else int(arr[1])) )
                    try:
                        unitPrice = int(float(detailDivs[2].span.text.replace('Rs. ','').replace(',','')))
                    except:
                        unitPrice = int(float(detailDivs[3].span.text.replace('Rs. ','').replace(',','')))
                    amountPaid = int((unitPrice*quantity*totalPaid)/subTotal)
                    productUrl = "http://www.amazon.in" + detailDivs[0].a.get('href')
                    subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)
                    subOrder.productCode = productUrl.split('/')[5]
                    subOrder.unitPrice = unitPrice
                    subOrder.merchantSubOrderId = str(j) + " of " + merchantOrder.merchantOrderId
                    estDlvyTime = datetime.now()
                    if deliverySpan is not None:
                        try:
                            subOrder.estimatedDeliveryDate = deliverySpan.span.text.strip()
                            estDate = subOrder.estimatedDeliveryDate.split("-")[0].strip()
                            subOrder.estimatedDeliveryInt = int(time.mktime((datetime.strptime(estDate, "%A %d %B %Y")).timetuple()))
                            estDlvyTime = datetime.strptime(estDate, "%A %d %B %Y")
                        except:
                            if "Delivered on" in deliverySpan.text:
                                subOrder.deliveredOn = deliverySpan.text.split(":")[1].strip() 
                            subOrder.estimatedDeliveryDate = "Not available"
                            
                    createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)
                    subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))
                    subOrder.detailedStatus = subOrderStatus
                    status=MStore.ORDER_PLACED
                    try:
                        status = self._getStatusFromDetailedStatus(subOrderStatus)
                    except:
                        try:
                            dateString = subOrderStatus.split("Delivered ")[1].strip()
                            subOrder.status = MStore.ORDER_DELIVERED
                            subOrder.detailedStatus = 'Delivered'
                            dateString = getDateStringDelivered(dateString)
                            if dateString is not None:
                                subOrder.deliveredOn = datetime.strftime(dateString, '%d-%b-%y')
                        except:
                            try:
                                dateString = subOrderStatus.split("Arriving ")[1].split("by")[0].strip()
                                subOrder.status = MStore.ORDER_SHIPPED
                                dateString = getDateStringArriving(dateString)
                                if dateString is not None:
                                    subOrder.deliveryEstimate = datetime.strftime(dateString, '%d-%b-%y')
                                else:
                                    subOrder.deliveryEstimate = subOrderStatus.split("Arriving ")[1].split("by")[0].strip()
                            except:
                                print "Unknown status Alert -", status
                    
                    subOrder.deliveryCharges = shippingPrice
                    if trackingUrl:
                        subOrder.trackingUrl  = trackingUrl
                    subOrder.imgUrl = imgDiv.img["src"]
                    if isPrime:
                        (cashbackAmount, percentage) = (0,0)
                    else:
                        (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amountPaid/quantity)
                    dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
                    subOrder.dealRank = dealRank.get('rank')
                    subOrder.rankDesc = dealRank.get('description')
                    subOrder.maxNlc = dealRank.get('maxNlc')
                    subOrder.minNlc = dealRank.get('minNlc')
                    subOrder.db = dealRank.get('dp')
                    subOrder.itemStatus = dealRank.get('status')
                    cashbackStatus = Store.CB_PENDING
                    if cashbackAmount <= 0:
                        cashbackStatus = Store.CB_NA
                    subOrder.cashBackStatus = cashbackStatus
                    subOrder.cashBackAmount = cashbackAmount*quantity
                    if percentage > 0:
                        subOrder.cashBackPercentage = percentage
                    if hasattr(subOrder, 'deliveredOn') or subOrder.status==Store.ORDER_DELIVERED:                               
                        subOrder.status = Store.ORDER_DELIVERED
                        subOrder.closed = True
                        if subOrder.cashBackStatus == Store.CB_PENDING:
                            subOrder.cashBackStatus = Store.CB_APPROVED
                    elif closedStatus:
                        closedStatus= False
                    subOrders.append(subOrder)
        merchantOrder.status='success'
        merchantOrder.closed = closedStatus
        self._updateToOrder(todict(merchantOrder))
        
    def parseCancelled(self, merchantOrder,soup):
        try:
            fonts = soup.body.findAll("table", recursive=False)[1].findAll("font")
            if fonts[0].text == "Important Message":
                if fonts[1].text=="This order has been cancelled.":
                    merchantOrder.closed = True
                    merchantOrder.status = "cancelled"
                    merchantOrder.requireDetail = False
                    self._updateToOrder(todict(merchantOrder))
                    return
                else:
                    raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
            else:
                raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
        except:
            orderDetails = soup.body.find(id="orderDetails")
            if orderDetails is not None and orderDetails.h4.text == "This order has been cancelled.":
                merchantOrder.closed = True
                merchantOrder.status = "cancelled"
                merchantOrder.requireDetail = False
                self._updateToOrder(todict(merchantOrder))
            else:
                raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)

    def getTrackingUrls(self, userId):
        
        missingOrderUrls = []
        missingOrders = self._getMissingOrders({'userId':userId})
        for missingOrder in missingOrders:
            missingOrderUrls.append(ORDER_REDIRECT_URL%(missingOrder['merchantOrderId']))
        orders = self._getActiveOrders({'userId':userId})
        count = len(orders)
        print "count", count
        print "Missing Urls"
        print "*************"
        print missingOrderUrls
        if count > 0:
            return missingOrderUrls + ['https://www.amazon.in/gp/css/order-history', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled&startIndex=10']
        else: 
            return missingOrderUrls
            
    def trackOrdersForUser(self, userId, url, rawHtml):
        rawHtml = re.sub(r'[^\x00-\x7F]+',' ', rawHtml)
        directory = "/AmazonTrack/User" + str(userId)
        if not os.path.exists(directory):
            os.makedirs(directory)
        
        
        try:
            searchMap = {'userId':userId}
            collectionMap = {'merchantOrderId':1}
            activeOrders = self._getActiveOrders(searchMap, collectionMap)
            datetimeNow = datetime.now()
            timestamp = int(time.mktime(datetimeNow.timetuple()))
            print "url----------------", url
            cancelledSummary = False
            if url == 'https://www.amazon.in/gp/css/order-history' or 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled' in url:
                if url == 'https://www.amazon.in/gp/css/order-history':
                    filename = directory + "/orderSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')
                else:   
                    filename = directory + "/cancelledSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')
                    cancelledSummary = True   
                f = open(filename,'w')
                f.write(rawHtml) # python will convert \n to os.linesep
                f.close() # you can omit in most cases as the destructor will call if
                soup = BeautifulSoup(rawHtml,'html5lib')
                allOrders = soup.find(id="ordersContainer").findAll('div', {'class':'a-box-group a-spacing-base order'})
                bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
                for activeOrder in activeOrders:
                    matched=False
                    for orderEle in allOrders:
                        deliveredOn = None
                        deliveryEstimate = None
                        shippingEstimate = None
                        orderdiv = orderEle.find('div', {'class':'a-box a-color-offset-background order-info'}).find('div', {'class':'a-fixed-right-grid-col actions a-col-right'})
                        merchantOrderId = orderdiv.find('span', {'class':'a-color-secondary value'}).text.strip()
                        if merchantOrderId==activeOrder['merchantOrderId']:
                            matched=True
                            closed = True
                            if not cancelledSummary:
                                shipments = orderEle.findAll('div',{'class':re.compile('.*?shipment.*?')}, recursive=False)
                            else:
                                shipments = orderEle.findAll('div',{'class':re.compile('.*?a-box.*?')}, recursive=False)
                                shipments.pop(0)
                            for shipment in shipments:
                                orderStatusDesc = None
                                shipdiv = shipment.find('div', {'class':'a-box-inner'})
                                sdivs = shipment.div.div.findAll('div', recursive=False)
                                try:
                                    orderStatus = sdivs[0].span.text.strip()
                                    status = self._getStatusFromDetailedStatus(orderStatus)
                                except:
                                    try:
                                        dateString = orderStatus.split("Delivered ")[1].strip()
                                        status = MStore.ORDER_DELIVERED
                                        deliveredOn = datetime.strftime(getDateStringDelivered(dateString), '%d-%b-%y')
                                    except:
                                        try:
                                            dateString = sdivs[0].span.text.strip().split("Arriving ")[1].split("by")[0].strip()
                                            status = MStore.ORDER_SHIPPED
                                            deliveryEstimate = datetime.strftime(getDateStringArriving(dateString), '%d-%b-%y')
                                        except:
                                            print "Unknown status Alert -", orderStatus
                                print merchantOrderId, "Order Status", orderStatus
                                try:
                                    orderStatusDesc = sdivs[0].findAll('div')[1].div.text.strip()
                                except:
                                    try:
                                        orderStatusDesc = sdivs[0].findAll('div')[1].text.strip()
                                    except:
                                        print "Order Status Description None or empty for", merchantOrderId, "and User", userId

                                if orderStatusDesc: 
                                    print merchantOrderId, "Order status desc", orderStatusDesc
                                    try:
                                        status = self._getStatusFromDetailedStatus(orderStatus)
                                    except:
                                        pass
                                    try:
                                        if "Dispatch estimate" in orderStatusDesc:
                                            shippingEstimate = orderStatus.split("Dispatch estimate").split("-")[0].strip()
                                        elif "Delivery estimate" in orderStatus:
                                            deliveryEstimate = orderStatus.split("Delivery estimate").split("-")[0].strip()
                                        elif "Arriving" in orderStatus:
                                            deliveryEstimate = datetime.strftime(getDateStringArriving(orderStatus.split("Arriving")[1].strip().split("by")[0].strip()), '%d-%b-%y')
                                    except:
                                        print "Could not find anything relevent for merchantOrder", merchantOrderId, "and User", userId 
                                        closed=False
                                        status = None

                                productDivs = shipdiv.find('div', {'class':re.compile('.*?a-spacing-top-medium.*?')}).find('div', {'class':'a-row'}).findAll('div', recursive=False)
                                trackingUrl = None
                                for buttonDiv in shipdiv.findAll('span', {'class':'a-button-inner'}):
                                    if buttonDiv.find('a').text.strip()=='Track package':
                                        trackingUrl = buttonDiv.find('a')['href'].strip()
                                        if not trackingUrl.startswith("http"):
                                            trackingUrl = "http://www.amazon.in" + trackingUrl
                                        break
                                for prodDiv in productDivs:
                                    prodDiv.find('div', {'class':'a-fixed-left-grid-inner'})
                                    productTitle = prodDiv.find('div', {'class':'a-fixed-left-grid-inner'}).find("div", {'class':'a-row'}).find('a').text.strip()
                                    imgUrl = prodDiv.find("img")["src"]
                                    for subOrder in activeOrder['subOrders']:
                                        if subOrder['closed']==True:
                                            continue
                                        if subOrder['productTitle'] in productTitle:
                                            findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId")}
                                            updateMap = {}
                                            closedStatus = False
                                            updateMap['subOrders.$.imgUrl'] = imgUrl
                                            updateMap['subOrders.$.lastTracked'] = timestamp
                                            if status:
                                                updateMap['subOrders.$.detailedStatus'] = orderStatus
                                                updateMap['subOrders.$.status'] = status 
                                                cashbackStatus = subOrder.get("cashBackStatus")
                                        
                                            if status==MStore.ORDER_DELIVERED:
                                                if deliveredOn:                               
                                                    updateMap['subOrders.$.deliveredOn'] = deliveredOn
                                                closedStatus = True
                                                updateMap['subOrders.$.closed'] = True
                                                if cashbackStatus == Store.CB_PENDING:
                                                    updateMap['subOrders.$.cashBackStatus'] = Store.CB_APPROVED
                                            if status==MStore.ORDER_CANCELLED:     
                                                closedStatus = True
                                                updateMap['subOrders.$.closed'] = True
                                                if cashbackStatus == Store.CB_PENDING:
                                                    updateMap['subOrders.$.cashBackStatus'] = Store.CB_CANCELLED
                                            if status==MStore.ORDER_SHIPPED:
                                                if deliveryEstimate:   
                                                    updateMap['subOrders.$.estimatedDeliveryDate'] = deliveryEstimate
                                                if trackingUrl is not None:
                                                    updateMap['subOrders.$.trackingUrl'] = trackingUrl
                                                    updateMap['subOrders.$.trackMissing'] = False
                                            if shippingEstimate:   
                                                updateMap['subOrders.$.estimatedShippingDate'] = shippingEstimate
                                            if not closedStatus:
                                                closed = False
                                            #{"subOrders.closed":False,"subOrders.trackingUrl":{"$exists":False},"subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}
                                            updateMap['status']='success'
                                            bulk.find(findMap).update({'$set' : updateMap})
                                            break
                            bulk.find({'orderId': activeOrder['orderId']}).update({"$set":{'closed':closed}})
                            break
                    if not matched:
                        updateMap = {'subOrders.$.trackMissing': True}
                        for subOrder in activeOrder['subOrders']:
                            if subOrder['closed']==True:
                                continue
                            findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId"), 
                                       "subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}}
                            bulk.find({'orderId': activeOrder['orderId']})
                            bulk.find(findMap).update({'$set':updateMap})
                bulk.execute()
                return 'PARSED_SUCCESS'
            else:
                merchantOrderId = re.findall(r'https://www.amazon.in/gp/css/summary/edit.html\?orderID=(.*)?', url, re.IGNORECASE)[0]
                print  "merchantOrderId", merchantOrderId
                merchantOrder = self.db.merchantOrder.find_one({"merchantOrderId":merchantOrderId})
                
                filename = directory + "/" + merchantOrderId   
                f = open(filename,'w')
                f.write(rawHtml) # python will convert \n to os.linesep
                f.close() # you can omit in most cases as the destructor will call if
                result = self.parseOrderRawHtml(merchantOrder['orderId'], merchantOrder['subTagId'], merchantOrder['userId'], rawHtml, url, True)['result']
                print "result", result
                try:
                    order1 = session.query(OrdersRaw).filter_by(id=merchantOrder['orderId']).first()
                    order1.status = result
                    order1.rawhtml = rawHtml
                    order1.order_url = url
                    session.commit()
                except:
                    traceback.print_exc()
                finally:
                    session.close()
                return 'PARSED_SUCCESS'
                pass
            return 'PARSED_SUCCESS_NO_ORDERS'
        except:
            traceback.print_exc()    
            return 'PARSED_FAILED'
            
    def _getStatusFromDetailedStatus(self, detailedStatus):
        if "ordered from" in detailedStatus.lower():
            return MStore.ORDER_PLACED 

        for key, value in self.orderStatusRegexMap.iteritems():
            if detailedStatus.lower() in value:
                return key
        
        print "Detailed Status need to be mapped", "Store:", self.store_id, detailedStatus
        raise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)
    def scrapeAffiliate(self, startDate=None, endDate=None):
        br = getBrowserObject()
        br.add_password('https://assoc-datafeeds-eu.amazon.com', 'Saholic', 'Fnubyvp')
        url = AMAZON_AFF_URL
        response = br.open(url)
        #get data for past 40 days and store it to mongo
        dt = datetime.now()
        dat = dt - timedelta(days=2)
        url = AMAZON_AFF_FILE_URL%(datetime.strftime(dat, "%Y%m%d"))
        response = br.open(url)
        page = gzip.GzipFile(fileobj=response, mode='rb').read()
        j=-1
        for row in page.split("\n"):
            j += 1
            if j== 0 or j==1:
                continue
            fields = row.split("\t")
            if len(fields)>1:
                print fields
                amazonAffiliate = AmazonAffiliateInfo(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], fields[8], fields[9])
                print amazonAffiliate
                self.db.amazonAffiliateInfo.insert(todict(amazonAffiliate))
            else:
                break
                
    
    def parseTrackingUrl(self, trackingUrl, orderId):
        print trackingUrl
        subOrder = {}
        page = fetchResponseUsingProxy(trackingUrl)
        status = MStore.ORDER_SHIPPED
        #print page
        soup = BeautifulSoup(page)
        header1 = soup.find("h1") 
        if header1:
            if header1.text=="Sign In" or header1.text.strip()=="Login":
                print "Login page is displayed for order id", orderId
                self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})
                subOrder['login'] = True
                return subOrder
        try:
            print "Tracking page is displayed for order id", orderId
            detailedStatus = soup.find("div", {"class":"top"}).span.text.strip()
            try:
                displayStatus = soup.find("div",{"class":"a-column a-span12 shipment-status-content"}).span.text.strip()
            except:
                displayStatus = detailedStatus
            print displayStatus
            if detailedStatus.lower().find("delivered")>=0:
                print detailedStatus
                displayStatus = "Delivered"
                status = "Delivered"
                try:
                    subOrder["deliveredOn"] = detailedStatus.split("on")[1].strip()
                except:
                    pass
                
            elif detailedStatus.lower() == 'returned':
                status = 'Cancelled'
            subOrder['status'] = status
            subOrder['detailedStatus'] = displayStatus 
            self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":False}})    
            
        except:
            self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})    
            print "failed to parse", orderId
            traceback.print_exc()
                
        return subOrder
            
             
            
    def parseInfo(self,):
        from pyquery import PyQuery as pq
        orders = list(session.query(Orders).filter_by(store_id=self.store_id).filter_by(status='DETAIL_CREATED').group_by(Orders.user_id).all())
        try:
            for order in orders:
                try:
                    doc = pq(order.rawhtml)
                    #a1= " ".join(["" if not div.text else div.text.replace("\t","").replace("\n","").replace(" ", "") for div in pq(doc('article')[-1])('div')])
                    lists = doc('ul.displayAddressUL li')
                    orderInfo = All_user_addresses()
                    orderInfo.address = lists[-3].text
                    orderInfo.user_id = order.user_id
                    orderInfo.source = 'order' 
                    #orderInfo.order_id = order.id
                    #orderInfo.email = None
                    #orderInfo.name = lists[0].text 
                    #orderInfo.mobile = None
                    adSplit = lists[-2].text.split(",")
                    match = re.match(r"([a-z ]+)([0-9]+)", adSplit[1], re.I)
                    if match:
                        items = match.groups()
                    orderInfo.city = adSplit[0].strip()
                    orderInfo.pincode = items[1].strip()
                    orderInfo.state = items[0].strip().title()
                    session.commit()
                except:
                    session.rollback()
                    continue
        finally:
            session.close()
        


def main():
    store = getStore(1)
    store.parseOrderRawHtml("444444", '123', 14, readSSh('/home/amit/amit.txt'), 'https://www.amazon.in/gp/css/summary/edit.html?orderID=402-0540293-4683515')
#    orders = list(session.query(OrdersRaw).filter_by(status = 'DETAIL_NOT_CREATED_UNKNOWN').filter(OrdersRaw.id > 61071).all())
#    session.close()
#    for o in orders:
#        try:
#            store.trackOrdersForUser(o.id, o.order_url, o.rawhtml)
#        finally:
#            session.close()
    #store.trackOrdersForUser(10466, 'https://www.amazon.in/gp/css/summary/edit.html?orderID=403-7498756-0837158', readSSh('/AmazonTrack/User10466/403-7498756-0837158'))
    #store.trackOrdersForUser(46195, 'https://www.amazon.in/gp/css/summary/edit.html?orderID=404-4294022-1187515', readSSh('/home/amit/amazon.html'))
    
def getSummaryFile(directory):
    date1 = datetime(2015,1,1)
    finalFile = None
    try:
        for file in os.listdir(directory):
            if file.startswith("orderSummary"):
                date2 = datetime.strptime("2015-" + file.split("orderSummary")[1].split(":")[0], "%Y-%d-%m")
                if date2 > date1:
                    date1 = date2
                    finalFile=file
    except:
        print "Missing directory"
    return finalFile 
                

def parseDetailNotCreated():
    try:
        store=getStore(1)
        orders = session.query(OrdersRaw).filter_by(status='DETAIL_NOT_CREATED_UNKNOWN').all()
        session.close()
        for order in orders:
            store.trackOrdersForUser(order.id, order.order_url, order.rawhtml)
        
    finally:
        session.close()
        
def getDateStringDelivered(dateString='Monday'):
    print dateString
    if dateString.lower()=='today':
        return date.today()
    if dateString.lower()=='yesterday':
        return date.today() - timedelta(days=1)
    try:
        return datetime.strptime(dateString, '%d-%b-%y')
    except:
        try:
            #get Closest Date from today
            curDate = date.today()
            curTime = datetime(curDate.year, curDate.month, curDate.day)
            curYear = curDate.year
            prevYear = curYear - 1
            dateMax = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")
            dateMin = datetime.strptime(dateString + " " + str(prevYear), "%A, %d %b %Y")
            if dateMax <= curTime:
                return dateMax
            else:
                return dateMin 
        except:
            try:
                days_of_week = ['sunday','monday','tuesday','wednesday',
                            'thursday','friday','saturday']
                deltaDays = curDate.isoweekday() - days_of_week.index(dateString.lower())
                if deltaDays <= 0:
                    deltaDays= deltaDays + 7
                curDate = curDate - timedelta(days=deltaDays)
                print datetime.strftime(curDate, '%d-%b-%y')
                return curDate
            except:
                print "could not parse"
                return None

def getDateStringArriving(dateString='Thursday'):
    print dateString
    if dateString.lower()=='today':
        return date.today()
    if dateString.lower()=='tomorrow':
        return date.today() + timedelta(days=1)
    try:
        return datetime.strptime(dateString, '%d-%b-%y')
    except:
        try:
            #get Closest Date from today
            curDate = date.today()
            curTime = datetime(curDate.year, curDate.month, curDate.day)
            curYear = curDate.year
            nextYear = curYear + 1
            dateMin = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")
            dateMax = datetime.strptime(dateString + " " + str(nextYear), "%A, %d %b %Y")
            if dateMin >= curTime:
                return dateMin
            else:
                return dateMax 
        except:
            try:
                days_of_week = ['sunday','monday','tuesday','wednesday',
                            'thursday','friday','saturday']
                deltaDays = days_of_week.index(dateString.lower()) - curDate.isoweekday()
                if deltaDays < 0:
                    deltaDays= deltaDays + 7
                curDate = curDate + timedelta(days=deltaDays)
                return curDate
            except:
                print "Could not parse"
                return None 

def main1():
    store = getStore(1)
    for merchantOrder in store.db.merchantOrder.find({"subOrders":{"$elemMatch":{"cashBackStatus":"Not Applicable", "cashBackPercentage":{"$gt":0}}}}):
        mo = obj(merchantOrder)
        for subOrder in mo.subOrders:
            subOrder.closed=False 
        print "orderId", mo.orderId
        store.populateDerivedFields(mo, False)
        store.db.merchantOrder.update({"orderId":mo.orderId}, {"$set":todict(mo)})
        break
        

def parseOrderNotCreated():
    try:
        store=getStore(1)
        orders = session.query(OrdersRaw).filter_by(status='ORDER_NOT_CREATED_UNKNOWN').all()
        session.close()
        for order in orders:
            result = store.parseOrderRawHtml(order.id, order.sub_tag, order.user_id, order.rawhtml, order.order_url)['result']
            order1 = session.query(OrdersRaw).filter_by(id=order.id).first()
            order1.status = result
            session.commit()
    finally:
        session.close()
    
if __name__ == '__main__':
    #readSSh("~/AmazonTrack/User18053/orderSummary07-01:00:33:08")
#    readSSh("/AmazonTrack/User10466/orderSummary18-11:21:32:36")
#    readSSh("/AmazonTrack/User5525/171-0333104-6169933")
    #main()
    store=getStore(1)
    store.parseOrderRawHtml(99999, 'd', 14, readSSh('/home/amit/sample.html'), 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html?ie=UTF8&asins=B01DDP7D6W&orderId=406-9807873-3094728&purchaseId=404-46')