Rev 15569 | Rev 16371 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
# coding=utf-8'''Created on Jan 15, 2015@author: amit'''from base64 import encodefrom bs4 import BeautifulSoupfrom datetime import datetime, timedeltafrom dtr.dao import Order, SubOrder, AmazonAffiliateInfofrom dtr.main import getStore, Store as MStore, ParseException, getBrowserObject, \ungzipResponse, tprintfrom dtr.sources.flipkart import todict, AFF_REPORT_URLfrom dtr.utils.utils import fetchResponseUsingProxyfrom paramiko import sftpfrom paramiko.client import SSHClientfrom paramiko.sftp_client import SFTPClientimport base64import gzipimport mechanizeimport os.pathimport paramikoimport reimport timeimport tracebackimport urllib2from dtr.storage.Mongo import getDealRankfrom dtr.storage.DataService import OrdersRawfrom elixir import *ORDER_REDIRECT_URL = 'https://www.amazon.in/gp/css/summary/edit.html?orderID=%s'ORDER_SUCCESS_URL = 'https://www.amazon.in/gp/buy/spc/handlers/static-submit-decoupled.html'THANKYOU_URL = 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html'AMAZON_AFF_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/listReports'AMAZON_AFF_FILE_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/getReport?filename=saholic-21-orders-report-%s.tsv.gz'class Store(MStore):orderStatusRegexMap = { MStore.ORDER_PLACED : ['ordered from', 'not yet dispatched','dispatching now', 'preparing for dispatch'],MStore.ORDER_SHIPPED : ['dispatched on','dispatched', 'on the way', 'out for delivery', 'Out for delivery'],MStore.ORDER_CANCELLED : ['return complete', 'refunded', 'cancelled'],MStore.ORDER_DELIVERED : ['delivered']}def __init__(self,store_id):super(Store, self).__init__(store_id)def getName(self):return "amazon"def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl, track=False):parseString = "Tracking" if track else "Transacted"print parseString, "Order Id to be parsed is :", orderIdresp = {}resp['result'] = 'ORDER_NOT_CREATED'if ORDER_SUCCESS_URL in orderSuccessUrl or THANKYOU_URL in orderSuccessUrl:try:soup = BeautifulSoup(rawHtml)try:orderUrl = soup.find('div', {"id":"thank-you-box-wrapper"}).div.findAll('div', recursive=False)[1].a['href']merchantOrderId = re.findall(r'.*&oid=(.*)&?.*?', orderUrl)[0]except:merchantOrderId = soup.find(id="orders-list").div.span.b.textorder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, True)order.orderTrackingUrl = ORDER_REDIRECT_URL % (merchantOrderId)order.orderSuccessUrl = orderSuccessUrlorder.merchantOrderId = merchantOrderIdorder.requireDetail = Trueorder.status = 'html_required'order.closed = Noneif self._saveToOrder(todict(order)):resp['result'] = 'ORDER_CREATED'resp["url"] = ORDER_REDIRECT_URL % (merchantOrderId)resp["htmlRequired"] = Trueresp['orderId'] = orderIdelse:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'except:#Write all cases here for Order Not created Knowntry:if 'Securely redirecting you' in soup.h3.text.strip():resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:try:if soup.h1.text.strip() in ['This is a duplicate order', 'There was a problem with your payment.']:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:try:if soup.h2.text.strip() == 'Web page not available':resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:try:if soup.find(id="loading-spinner-img") is not None or soup.find(id="anonCarousel1") is not None or soup.find(id="ap_signin_pagelet_title") is not None:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:resp['result'] = 'ORDER_NOT_CREATED_UNKNOWN'else:try:mo = self.db.merchantOrder.find_one({"orderId":orderId})if mo is not None:merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, False)merchantOrder.createdOn = mo.get("createdOn")merchantOrder.createdOnInt = mo.get("createdOnInt")else:print "Could not find amazon order with order Id", orderIdmerchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)soup = BeautifulSoup(rawHtml)try:self.parseNewStlye(merchantOrder, soup)resp['result'] = 'DETAIL_CREATED'except:try:traceback.print_exc()self.parseOldStlye(merchantOrder, soup)resp['result'] = 'DETAIL_CREATED'except:traceback.print_exc()try:self.parseCancelled(merchantOrder, soup)resp['result'] = 'ORDER_CANCELLED'except:try:if soup.h1.span.text=="Account" or soup.h1=="Your Account":resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'else:raiseexcept:if soup.find(id="ap_signin_pagelet_title").h1.text.strip()=="Sign In":resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'else:raiseexcept:order = self.db.merchantOrder.find_one({"orderId":orderId})if order is not None:self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"parse_failed"}})print "Error occurred"resp['result'] = 'DETAIL_NOT_CREATED_UNKNOWN'traceback.print_exc()return resp#This should be exposed from api for specific sourcesdef scrapeStoreOrders(self):orders = self.db.merchantOrder.find({"storeId":1, "closed":False, "subOrders.closed":False, "subOrders.trackingUrl":{"$exists":True}})for merchantOrder in orders:executeBulk = Falsetry:bulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = Truemap1 = {}for subOrder in merchantOrder.get("subOrders"):if subOrder.get("closed"):continueelif subOrder.get("trackingUrl") is None:closed = FalsecontinueexecuteBulk = TruefindMap = {"orderId":merchantOrder.get("orderId"), "subOrders.merchantSubOrderId":subOrder.get("merchantSubOrderId")}trackingUrl = subOrder.get("trackingUrl")if not map1.has_key(trackingUrl):map1[trackingUrl] = self.parseTrackingUrl(trackingUrl)newOrder = map1.get(trackingUrl)newOrder['cashBackStatus'] = subOrder.get('cashBackStatus')updateMap = self.getUpdateMap(newOrder)print findMap, "\n", updateMapbulk.find(findMap).update({'$set' : updateMap})closed = closed and newOrder['closed']if executeBulk:bulk.find({"orderId":merchantOrder.get("orderId")}).update({"$set":{"closed":closed, "parseError":False}})bulk.execute()except:tprint("Could not update " + str(merchantOrder['orderId']) + " For store " + self.getName())self.db.merchantOrder.update({"orderId":merchantOrder['orderId']}, {"$set":{"parseError":True}})traceback.print_exc()def parserest(self, soup):print "Hi"if soup.find('h1'):print "OK"def parseOldStlye(self, merchantOrder, soup):merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrltable = soup.body.findAll("table", recursive=False)[1]#print tabletables = table.tr.td.findAll("table", recursive=False)for tr in tables[2].findAll("tr"):boldElement = tr.td.bif "Order Placed" in str(boldElement):merchantOrder.placedOn = boldElement.next_sibling.strip()if "order number" in str(boldElement):merchantOrder.merchantOrderId = boldElement.next_sibling.strip()if "Order Total" in str(boldElement):merchantOrder.paidAmount = int(float(boldElement.find('span').contents[-1].replace(',','')))anchors = table.tr.td.findAll("a", recursive=False)paymentAnchor = anchors.pop(-1)count = 0subOrders = []merchantOrder.subOrders = subOrderscounter = 0for anchor in anchors:count += 1tab = anchor.next_siblingstatus = MStore.ORDER_PLACEDsubStr = "Delivery #" + str(count) + ":"if subStr in tab.find("b").text:detailedStatus = tab.find("b").text.replace(subStr, '').strip()tab = tab.next_sibling.next_siblingtrs = tab.find("table").find('tbody').findAll("tr", recursive = False)estimatedDelivery = trs[0].td.find("b").next_sibling.strip()orderItemTrs = trs[1].findAll("td", recursive=False)[1].table.tbody.findAll("tr", recursive = False)i = -1for orderItemTr in orderItemTrs:i += 1if i%2 == 0:continuecounter += 1quantity = int(re.findall(r'\d+', orderItemTr.td.contents[0])[0])productUrl = orderItemTr.td.contents[1].a["href"]productTitle = orderItemTr.td.contents[1].a.textunitPrice = int(float(orderItemTr.findAll('td')[1].span.text.replace('Rs. ','').replace(',','')))subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, unitPrice*quantity, status, quantity)subOrder.merchantSubOrderId = str(counter) + " of " + merchantOrder.merchantOrderIdsubOrder.estimatedDeliveryDate = estimatedDeliveryestDlvyTime = datetime.strptime(estimatedDelivery.split('-')[0].strip(), "%A %d %B %Y")createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))subOrder.productCode = productUrl.split('/')[5]subOrder.detailedStatus = detailedStatus(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, unitPrice*quantity)dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountif percentage > 0:subOrder.cashBackPercentage = percentagesubOrders.append(subOrder)priceList = paymentAnchor.next_sibling.next_sibling.next_sibling.table.table.tbody.tbody.tbody.findAll('tr', recursive=False)totalAmount = 0grandAmount = 0for price in priceList:labelTd = price.tdif 'Subtotal:' in labelTd.text:totalAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))elif 'Grand Total:' in labelTd.text:grandAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))if grandAmount < totalAmount:diff = totalAmount - grandAmountfor subOrder in merchantOrder.subOrders:subOrder.amountPaid -= int(diff*(1-subOrder.amountPaid/totalAmount))merchantOrder.status='success'self._updateToOrder(todict(merchantOrder))def parseNewStlye(self, merchantOrder, soup):merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrlorderDetailsContainer = soup.body.find(id="orderDetails")divAfterH1 = orderDetailsContainer.h1.next_sibling.next_siblingorderLeftDiv = divAfterH1.divplacedOnSpan = orderLeftDiv.find("span", {'class':'order-date-invoice-item'})merchantOrder.placedOn =placedOnSpan.text.split('Ordered on')[1].strip()merchantOrder.merchantOrderId = placedOnSpan.next_sibling.next_sibling.text.split('Order#')[1].strip()try:priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box-inner"}).div.div.findAll('div', recursive=False)[-1]except:priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box a-last"}).div.div.findAll('div', recursive=False)[-1]priceRows = priceBox.findAll('div', {'class':'a-row'})subTotal = 0shippingPrice = 0promoApplied = 0for priceRow in priceRows:if "Item(s) Subtotal:" in str(priceRow):subTotal = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))elif "Shipping:" in str(priceRow):shippingPrice = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))elif "Grand Total:" in str(priceRow):grandPrice = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))merchantOrder.paidAmount = grandPriceelif "Total:" in str(priceRow):totalPrice = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))elif "Promotion Applied:" in str(priceRow):promoApplied += int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))totalPaid = subTotalif promoApplied > 0:totalPaid -= promoAppliedif shippingPrice <= promoApplied:totalPaid += shippingPriceshipmentDivs = orderDetailsContainer.find('div', class_='shipment').findAll('div', recursive = False)subOrders = []merchantOrder.subOrders = subOrdersi=1closedStatus = Truefor shipmentDiv in shipmentDivs:innerBoxes = shipmentDiv.findAll('div', recursive = False)statusDiv = innerBoxes[0]subOrderStatus = statusDiv.div.span.text.strip()deliverySpan = statusDiv.div.div.find_next_sibling('div').spanproductDivs = innerBoxes[-1].div.div.div.findAll('div', recursive=False)subOrders = []merchantOrder.subOrders = subOrdersfor i, productDiv in enumerate(productDivs):i +=1imgDiv = productDiv.div.divdetailDiv = imgDiv.find_next_sibling('div')detailDivs = detailDiv.findAll('div', recursive=False)arr = detailDivs[0].a.text.strip().split(" of ", 1)(productTitle, quantity) = (arr[-1], (1 if len(arr)==1 else int(arr[0])) )try:unitPrice = int(float(detailDivs[2].span.text.replace('Rs. ','').replace(',','')))except:unitPrice = int(float(detailDivs[3].span.text.replace('Rs. ','').replace(',','')))amountPaid = int((unitPrice*quantity*totalPaid)/subTotal)productUrl = "http://www.amazon.in" + detailDivs[0].a.get('href')subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)subOrder.productCode = productUrl.split('/')[5]subOrder.unitPrice = unitPricesubOrder.merchantSubOrderId = str(i) + " of " + merchantOrder.merchantOrderIdestDlvyTime = datetime.now()if deliverySpan is not None:try:subOrder.estimatedDeliveryDate = deliverySpan.span.text.strip()estDate = subOrder.estimatedDeliveryDate.split("-")[0].strip()subOrder.estimatedDeliveryInt = int(time.mktime((datetime.strptime(estDate, "%A %d %B %Y")).timetuple()))estDlvyTime = datetime.strptime(estDate, "%A %d %B %Y")except:if "Delivered on" in deliverySpan.text:subOrder.deliveredOn = deliverySpan.text.split(":")[1].strip()subOrder.estimatedDeliveryDate = "Not available"createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))subOrder.detailedStatus = subOrderStatussubOrder.deliveryCharges = shippingPricesubOrder.imgUrl = imgDiv.img["src"](cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amountPaid)dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountif percentage > 0:subOrder.cashBackPercentage = percentageif hasattr(subOrder, 'deliveredOn'):subOrder.status = Store.ORDER_DELIVEREDsubOrder.closed = Trueif subOrder.cashBackStatus == Store.CB_PENDING:subOrder.cashBackStatus = Store.CB_APPROVEDelif closedStatus:closedStatus= FalsesubOrders.append(subOrder)merchantOrder.status='success'merchantOrder.closed = closedStatusself._updateToOrder(todict(merchantOrder))def parseCancelled(self, merchantOrder,soup):try:fonts = soup.body.findAll("table", recursive=False)[1].findAll("font")if fonts[0].text == "Important Message":if fonts[1].text=="This order has been cancelled.":merchantOrder.closed = TruemerchantOrder.status = "cancelled"merchantOrder.requireDetail = Falseself._updateToOrder(todict(merchantOrder))returnelse:raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)else:raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)except:orderDetails = soup.body.find(id="orderDetails")if orderDetails is not None and orderDetails.h4.text == "This order has been cancelled.":merchantOrder.closed = TruemerchantOrder.status = "cancelled"merchantOrder.requireDetail = Falseself._updateToOrder(todict(merchantOrder))else:raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)def getTrackingUrls(self, userId):missingOrderUrls = []missingOrders = self._getMissingOrders({'userId':userId})for missingOrder in missingOrders:missingOrderUrls.append(ORDER_REDIRECT_URL%(missingOrder['merchantOrderId']))orders = self._getActiveOrders({'userId':userId, "subOrders.trackingUrl":{"$exists":False} })count = len(orders)print "count", countprint "Missing Urls"print "*************"print missingOrderUrlsif count > 0:return missingOrderUrls + ['https://www.amazon.in/gp/css/order-history', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled&startIndex=10']else:return missingOrderUrlsdef trackOrdersForUser(self, userId, url, rawHtml):directory = "/AmazonTrack/User" + str(userId)if not os.path.exists(directory):os.makedirs(directory)try:searchMap = {'userId':userId}collectionMap = {'merchantOrderId':1}activeOrders = self._getActiveOrders(searchMap, collectionMap)datetimeNow = datetime.now()timestamp = int(time.mktime(datetimeNow.timetuple()))print "url----------------", urlif url == 'https://www.amazon.in/gp/css/order-history' or 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled' in url:if url == 'https://www.amazon.in/gp/css/order-history':filename = directory + "/orderSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')else:filename = directory + "/cancelledSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')f = open(filename,'w')f.write(rawHtml) # python will convert \n to os.linesepf.close() # you can omit in most cases as the destructor will call ifsoup = BeautifulSoup(rawHtml)allOrders = soup.find(id="ordersContainer").findAll('div', {'class':'a-box-group a-spacing-base order'})bulk = self.db.merchantOrder.initialize_ordered_bulk_op()for activeOrder in activeOrders:for orderEle in allOrders:orderdiv = orderEle.find('div', {'class':'a-box a-color-offset-background order-info'}).find('div', {'class':'a-fixed-right-grid-col actions a-col-right'})merchantOrderId = orderdiv.find('span', {'class':'a-color-secondary value'}).text.strip()if merchantOrderId==activeOrder['merchantOrderId']:closed = Trueshipments = orderEle.findAll('div',{'class':re.compile('.*?a-box.*?')}, recursive=False)shipments.pop(0)for shipment in shipments:shipdiv = shipment.find('div', {'class':'a-box-inner'})sdivs = shipment.div.div.findAll('div', recursive=False)orderStatus = sdivs[0].span.text.strip()status = self._getStatusFromDetailedStatus(orderStatus)if status == MStore.ORDER_DELIVERED:deliveredOn = sdivs[0].findAll('span')[-1].text.strip()try:deliveredOn = deliveredOn.split(":")[1].strip()except:deliveredOn = ""deliveryestimatespan = sdivs[0].find('span', {'class':'a-color-success'})deliveryEstimate = Noneif deliveryestimatespan is not None:deliveryEstimate = deliveryestimatespan.find('span', {'class':'a-text-bold'}).text.strip()productDivs = shipdiv.find('div', {'class':re.compile('.*?a-spacing-top-medium.*?')}).find('div', {'class':'a-row'}).findAll('div', recursive=False)trackingUrl = Nonefor buttonDiv in shipdiv.findAll('span', {'class':'a-button-inner'}):if buttonDiv.find('a').text.strip()=='Track package':trackingUrl = buttonDiv.find('a')['href'].strip()if not trackingUrl.startswith("http"):trackingUrl = "http://www.amazon.in" + trackingUrlbreakfor prodDiv in productDivs:prodDiv.find('div', {'class':'a-fixed-left-grid-inner'})productTitle = prodDiv.find('div', {'class':'a-fixed-left-grid-inner'}).find("div", {'class':'a-row'}).find('a').text.strip()imgUrl = prodDiv.find("img")["src"]for subOrder in activeOrder['subOrders']:if subOrder['productTitle'] == productTitle:findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId")}updateMap = {}closedStatus = FalseupdateMap['subOrders.$.imgUrl'] = imgUrlupdateMap['subOrders.$.lastTracked'] = timestampupdateMap['subOrders.$.detailedStatus'] = orderStatuscashbackStatus = subOrder.get("cashBackStatus")updateMap['subOrders.$.status'] = statusif status==MStore.ORDER_DELIVERED:updateMap['subOrders.$.deliveredOn'] = deliveredOnclosedStatus = TrueupdateMap['subOrders.$.closed'] = Trueif cashbackStatus == Store.CB_PENDING:updateMap['subOrders.$.cashBackStatus'] = Store.CB_APPROVEDif status==MStore.ORDER_CANCELLED:closedStatus = TrueupdateMap['subOrders.$.closed'] = Trueif cashbackStatus == Store.CB_PENDING:updateMap['subOrders.$.cashBackStatus'] = Store.CB_CANCELLEDif status==MStore.ORDER_SHIPPED:updateMap['subOrders.$.estimatedDeliveryDate'] = deliveryEstimateif trackingUrl is not None:updateMap['subOrders.$.trackingUrl'] = trackingUrlif not closedStatus:closed = Falsebulk.find(findMap).update({'$set' : updateMap})breakbulk.find({'orderId': activeOrder['orderId']}).update({'$set':{'closed': closed}})bulk.execute()return 'PARSED_SUCCESS'else:merchantOrderId = re.findall(r'https://www.amazon.in/gp/css/summary/edit.html\?orderID=(.*)?', url, re.IGNORECASE)[0]print "merchantOrderId", merchantOrderIdmerchantOrder = self.db.merchantOrder.find_one({"merchantOrderId":merchantOrderId})filename = directory + "/" + merchantOrderIdf = open(filename,'w')f.write(rawHtml) # python will convert \n to os.linesepf.close() # you can omit in most cases as the destructor will call ifresult = self.parseOrderRawHtml(merchantOrder['orderId'], merchantOrder['subTagId'], merchantOrder['userId'], rawHtml, url, True)['result']print "result", resulttry:order1 = session.query(OrdersRaw).filter_by(id=merchantOrder['orderId']).first()order1.status = resultorder1.rawhtml = rawHtmlsession.commit()except:traceback.print_exc()finally:session.close()return 'PARSED_SUCCESS'passreturn 'PARSED_SUCCESS_NO_ORDERS'except:traceback.print_exc()return 'PARSED_FAILED'def _getStatusFromDetailedStatus(self, detailedStatus):if "ordered from" in detailedStatus.lower():return MStore.ORDER_PLACEDfor key, value in self.orderStatusRegexMap.iteritems():if detailedStatus.lower() in value:return keyprint "Detailed Status need to be mapped", "Store:", self.store_idraise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)def scrapeAffiliate(self, startDate=None, endDate=None):br = getBrowserObject()br.add_password('https://assoc-datafeeds-eu.amazon.com', 'Saholic', 'Fnubyvp')url = AMAZON_AFF_URLresponse = br.open(url)#get data for past 40 days and store it to mongodt = datetime.now()dat = dt - timedelta(days=2)url = AMAZON_AFF_FILE_URL%(datetime.strftime(dat, "%Y%m%d"))response = br.open(url)page = gzip.GzipFile(fileobj=response, mode='rb').read()j=-1for row in page.split("\n"):j += 1if j== 0 or j==1:continuefields = row.split("\t")if len(fields)>1:print fieldsamazonAffiliate = AmazonAffiliateInfo(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], fields[8], fields[9])print amazonAffiliateself.db.amazonAffiliateInfo.insert(todict(amazonAffiliate))else:breakdef parseTrackingUrl(self, trackingUrl):subOrder = {}page = fetchResponseUsingProxy(trackingUrl)soup = BeautifulSoup(page)alertContainer = soup.find("div", {"class":"a-box-inner a-alert-container"})if alertContainer is not None:statusText = alertContainer.h4.span.textdetailedStatus = statusText.split(":")[0].strip()subOrder['detailedStatus'] = detailedStatusif detailedStatus.lower() == "in transit":passelif detailedStatus.lower() == "undeliverable":subOrder['status'] = MStore.ORDER_CANCELLEDreturn subOrderelse:summaryLeft = soup.find(id="summaryLeft")statusText = summaryLeft.h2.textdetailedStatus = statusText.split(":")[0].strip()subOrder['detailedStatus'] = detailedStatusif detailedStatus.lower() == "delivered":subOrder['deliveredOn'] = summaryLeft.findAll("span")[-2].text.strip()subOrder['status'] = MStore.ORDER_DELIVEREDreturn subOrderelse:subOrder['expectedDelivery'] = summaryLeft.findAll("span")[-1].text.strip()return subOrderdef main():#str1 = readSSh("/AmazonTrack/User533/403-3748792-1516356")#readSSh("/tmp/User2/404-2225153-7073122")store = getStore(1)#store.scrapeStoreOrders()# br = mechanize.Browser()# br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),# ('Accept', 'text/html,application/xhtml+xml,application/json,application/xml;q=0.9,*/*;q=0.8'),# ('Accept-Encoding', 'gzip,deflate,sdch'),# ('Accept-Language', 'en-US,en;q=0.8'),# ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]# store.scrapeStoreOrders()store.parseOrderRawHtml(5322, "13232", 14, readSSh("/home/amit/5322"), "https://www.amazon.in/gp/buy/spc/handlers/static-submit-decoupled.html/ref=ox_spc_place_order?ie=UTF8&fromAnywhere=1&hasWorkingJav")#store.trackOrdersForUser(4355, "https://www.amazon.in/gp/css/summary/edit.html?orderID=171-4824011-7090713", readSSh("~/4355"))#readSSh("/tmp/User211/2015-04-12 10:32:41.905765")#store.scrapeAffiliate()#parseDetailNotCreated()parseOrderNotCreated()def parseDetailNotCreated():try:store=getStore(1)orders = session.query(OrdersRaw).filter_by(status='DETAIL_NOT_CREATED_UNKNOWN').all()session.close()for order in orders:store.trackOrdersForUser(order.id, order.order_url, order.rawhtml)finally:session.close()def parseOrderNotCreated():try:store=getStore(1)orders = session.query(OrdersRaw).filter_by(status='ORDER_NOT_CREATED_UNKNOWN').all()session.close()for order in orders:result = store.parseOrderRawHtml(order.id, order.sub_tag, order.user_id, order.rawhtml, order.order_url)['result']order1 = session.query(OrdersRaw).filter_by(id=order.id).first()order1.status = resultsession.commit()finally:session.close()def readSSh(fileName):try:str1 = open(fileName).read()return str1except:ssh_client = SSHClient()str1 = ""ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())ssh_client.connect('dtr', 22, 'root', 'ecip$dtrMay2014')sftp_client = ssh_client.open_sftp()try:if not os.path.exists(os.path.dirname(fileName)):os.makedirs(os.path.dirname(fileName))sftp_client.get(fileName, fileName)try:str1 = open(fileName).read()return str1finally:passexcept:"could not read"return str1if __name__ == '__main__':main()