Rev 20687 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
# coding=utf-8'''Created on Jan 15, 2015@author: amit'''from base64 import encodefrom bs4 import BeautifulSoupfrom datetime import datetime, timedelta, datefrom dtr.api.Order import process_rejectsfrom dtr.dao import Order, SubOrder, AmazonAffiliateInfo, objfrom dtr.main import getStore, Store as MStore, ParseException, getBrowserObject, \ungzipResponse, tprintfrom dtr.storage.DataService import OrdersRaw, Orders, Order_Parse_Info, \All_user_addressesfrom dtr.storage.Mongo import getDealRankfrom dtr.utils import utilsfrom dtr.utils.utils import fetchResponseUsingProxy, readSSh, todictfrom elixir import *import base64import dtrimport gzipimport mechanizeimport os.pathfrom pyquery import PyQuery as pqimport reimport timeimport tracebackimport urllib2import urlparseORDER_REDIRECT_URL = 'https://www.amazon.in/gp/css/summary/edit.html?orderID=%s'ORDER_SUCCESS_URL = 'https://www.amazon.in/gp/buy/spc/handlers/static-submit-decoupled.html'THANKYOU_URL = 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html'AMAZON_AFF_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/listReports'AMAZON_AFF_FILE_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/getReport?filename=saholic-21-orders-report-%s.tsv.gz'class Store(MStore):orderStatusRegexMap = { MStore.ORDER_PLACED : ['ordered from', 'not yet dispatched','dispatching now', 'preparing for dispatch', 'order received'],MStore.ORDER_SHIPPED : ['dispatched on','dispatched', 'on the way', 'out for delivery', 'Out for delivery'],MStore.ORDER_CANCELLED : ['return complete', 'refunded', 'cancelled', 'replacement complete', 'return received'],MStore.ORDER_DELIVERED : ['delivered', 'your package was delivered', 'package was handed directly to customer']}def __init__(self,store_id):super(Store, self).__init__(store_id)def getName(self):return "amazon"def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl, track=False):rawHtml = re.sub(r'[^\x00-\x7F]+',' ', rawHtml)parseString = "Tracking" if track else "Transacted"print parseString, "Order Id to be parsed is :", orderIdresp = {}resp['result'] = 'ORDER_NOT_CREATED'if ORDER_SUCCESS_URL in orderSuccessUrl or THANKYOU_URL in orderSuccessUrl:try:doc = pq(rawHtml)try:orderUrl = doc('a.a-touch-link').attr.hrefmerchantOrderId = re.findall(r'.*&oid=(.*)&?.*?', orderUrl)[0]except:merchantOrderId = doc("#orders-list>div>span>b").html()if not merchantOrderId:merchantOrderId = urlparse.parse_qs(urlparse.urlsplit(orderSuccessUrl).query)else:raiseif not merchantOrderId or re.match("\d+-\d+\d+", merchantOrderId):raiseorder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, True)order.orderTrackingUrl = ORDER_REDIRECT_URL % (merchantOrderId)order.orderSuccessUrl = orderSuccessUrlorder.merchantOrderId = merchantOrderIdorder.requireDetail = Trueorder.status = 'html_required'order.closed = Noneif self._saveToOrder(todict(order)):resp['result'] = 'ORDER_CREATED'resp["url"] = ORDER_REDIRECT_URL % (merchantOrderId)resp["htmlRequired"] = Trueresp['orderId'] = orderIdelse:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'except:#Write all cases here for Order Not created Knownsoup = BeautifulSoup(rawHtml, "html5lib")try:if not soup.body:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'elif 'Securely redirecting you' in soup.find("h3").text.strip() or soup.find("h3").text.strip()=="Orders":resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:try:if soup.find("h1").text.strip() in ['This is a duplicate order', 'There was a problem with your payment.', 'Your Orders', 'Your Shopping Cart is empty.', 'Select a payment method', 'Edit quantities'] or "Saved for later" in soup.find("h1").text.strip():resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:try:if soup.find("h2").text.strip() in ['Web page not available','Webpage not available', 'Do you have an Amazon password?']:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:try:if soup.find(id="loading-spinner-img") is not None or soup.find(id="anonCarousel1") is not None or soup.find(id="ap_signin_pagelet_title") is not None or soup.find(id="nav-greeting-name") is not None:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'elif soup.find("b", {'class':'h1'}).text.strip().find("We're sorry") > -1:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'else:raiseexcept:resp['result'] = 'ORDER_NOT_CREATED_UNKNOWN'else:try:mo = self.db.merchantOrder.find_one({"orderId":orderId})if mo is not None:merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, False)merchantOrder.createdOn = mo.get("createdOn")merchantOrder.createdOnInt = mo.get("createdOnInt")else:print "Could not find amazon order with order Id", orderIdmerchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)soup = BeautifulSoup(rawHtml, "html5lib")if not soup.body:resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'else:try:self.parseNewStlye(merchantOrder, soup)resp['result'] = 'DETAIL_CREATED'except:try:traceback.print_exc()self.parseAnotherStlye(merchantOrder, pq(rawHtml))resp['result'] = 'DETAIL_CREATED'except:try:traceback.print_exc()self.parseOldStlye(merchantOrder, soup)resp['result'] = 'DETAIL_CREATED'except:traceback.print_exc()try:self.parseCancelled(merchantOrder, soup)resp['result'] = 'ORDER_CANCELLED'except:try:if soup.find("h1").text.strip() in ["Your Account"] or soup.find("h1").span.text=="Account":resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'else:raiseexcept:if soup.find(id="ap_signin_pagelet_title").find("h1").text.strip()=="Sign In":resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'else:raiseif resp['result'] == 'DETAIL_NOT_CREATED_KNOWN':self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})except:self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})print "Error occurred"resp['result'] = 'DETAIL_NOT_CREATED_UNKNOWN'traceback.print_exc()return resp#This should be exposed from api for specific sourcesdef scrapeStoreOrders(self):orders = self.db.merchantOrder.find({"storeId":1, "closed":False, "subOrders.closed":False, "subOrders.trackingUrl":{"$exists":True}, "subOrders.login":{"$exists":False}})for merchantOrder in orders:executeBulk = Falsetry:bulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = Truemap1 = {}for subOrder in merchantOrder.get("subOrders"):if subOrder.get("closed"):continueelif subOrder.get("trackingUrl") is None:closed = FalsecontinuefindMap = {"orderId":merchantOrder.get("orderId"), "subOrders.merchantSubOrderId":subOrder.get("merchantSubOrderId")}trackingUrl = subOrder.get("trackingUrl")if not map1.has_key(trackingUrl):map1[trackingUrl] = self.parseTrackingUrl(trackingUrl, merchantOrder.get("orderId"))newOrder = map1.get(trackingUrl)if newOrder:executeBulk = TrueupdateMap = self.getUpdateMap(newOrder, subOrder.get('cashBackStatus'))print findMap, "\n", updateMapbulk.find(findMap).update({'$set' : updateMap})closed = closed and newOrder['closed']if executeBulk:bulk.find({"orderId":merchantOrder.get("orderId")}).update({"$set":{"closed":closed, "parseError":False}})bulk.execute()except:tprint("Could not update " + str(merchantOrder['orderId']) + " For store " + self.getName())self.db.merchantOrder.update({"orderId":merchantOrder['orderId']}, {"$set":{"parseError":True}})traceback.print_exc()def parserest(self, soup):print "Hi"if soup.find('h1'):print "OK"def parseOldStlye(self, merchantOrder, soup):merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrltable = soup.body.findAll("table", recursive=False)[1]#print tabletables = table.tr.td.findAll("table", recursive=False)for tr in tables[2].findAll("tr"):boldElement = tr.td.bif "Order Placed" in str(boldElement):merchantOrder.placedOn = boldElement.next_sibling.strip()if "order number" in str(boldElement):merchantOrder.merchantOrderId = boldElement.next_sibling.strip()if "Order Total" in str(boldElement):merchantOrder.paidAmount = int(float(boldElement.find('span').contents[-1].replace(',','')))anchors = table.tr.td.findAll("a", recursive=False)paymentAnchor = anchors.pop(-1)count = 0subOrders = []merchantOrder.subOrders = subOrderscounter = 0for anchor in anchors:count += 1tab = anchor.next_siblingstatus = MStore.ORDER_PLACEDsubStr = "Delivery #" + str(count) + ":"if subStr in tab.find("b").text:detailedStatus = tab.find("b").text.replace(subStr, '').strip()tab = tab.next_sibling.next_siblingtrs = tab.find("table").find('tbody').findAll("tr", recursive = False)estimatedDelivery = trs[0].td.find("b").next_sibling.strip()orderItemTrs = trs[1].findAll("td", recursive=False)[1].table.tbody.findAll("tr", recursive = False)i = -1for orderItemTr in orderItemTrs:i += 1if i%2 == 0:continuecounter += 1quantity = int(re.findall(r'\d+', orderItemTr.td.contents[0])[0])productUrl = orderItemTr.td.contents[1].a["href"]productTitle = orderItemTr.td.contents[1].a.textunitPrice = int(float(orderItemTr.findAll('td')[1].span.text.replace('Rs. ','').replace(',','')))subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, unitPrice*quantity, status, quantity)subOrder.merchantSubOrderId = str(counter) + " of " + merchantOrder.merchantOrderIdsubOrder.estimatedDeliveryDate = estimatedDeliveryestDlvyTime = datetime.strptime(estimatedDelivery.split('-')[0].strip(), "%A %d %B %Y")createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))subOrder.productCode = productUrl.split('/')[5]subOrder.detailedStatus = detailedStatus(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, unitPrice)dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrder.maxNlc = dealRank.get('maxNlc')subOrder.minNlc = dealRank.get('minNlc')subOrder.db = dealRank.get('dp')subOrder.itemStatus = dealRank.get('status')cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmount*quantityif percentage > 0:subOrder.cashBackPercentage = percentagesubOrders.append(subOrder)priceList = paymentAnchor.next_sibling.next_sibling.next_sibling.table.table.tbody.tbody.tbody.findAll('tr', recursive=False)totalAmount = 0grandAmount = 0for price in priceList:labelTd = price.tdif 'Subtotal:' in labelTd.text:totalAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))elif 'Grand Total:' in labelTd.text:grandAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))if grandAmount < totalAmount:diff = totalAmount - grandAmountfor subOrder in merchantOrder.subOrders:subOrder.amountPaid -= int(diff*(1-subOrder.amountPaid/totalAmount))merchantOrder.status='success'self._updateToOrder(todict(merchantOrder))def parseAnotherStlye(self, merchantOrder, pqobj):counter=0detailSection = NoneshipmentSection = NonesummarySection = Nonefor el in pqobj('.a-section'):if "View order details" in pq(el).text():detailSection = pq(el)counter += 1if "Shipment details" in pq(el).text():shipmentSection = pq(el)counter += 1if "Order Summary" in pq(el).text():summarySection = pq(el)counter += 1if counter == 3:breaki=-1for s in shipmentSection('.a-box-group'):shipmentGroup = pq(s)for shipment in shipmentGroup('.a-box'):i += 1if i==0:continueshipment = pq(shipment)shipmentStatusSection = shipment('.a-section:eq(0)')productDetails = shipment('.a-section:eq(1)>.a-row')print shipmentStatusSection('h3').text(), shipmentStatusSection('p').text(), shipmentStatusSection('span').text()for productDetail in productDetails:productDetail = pq(productDetail)pImg = productDetail.children('div').eq(0)pQty = productDetail.children('div').eq(1)#print pImg('a').attr('href'), pImg('a').attr('title'), pImg('img').attr('src'), pQty('.a-row:nth-child(2)')('span').text().split(':')[1].strip(), pQty('span.currencyINR')productUrl = pImg('a').attr('href'),qty = pQty('.a-row:nth-child(2)')('span').text().split(':')[1].strip()price = int(float(pQty('nobr').text().replace('Rs.','').replace(',', '')))#subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)raisemerchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrlmerchantOrder.placedOnmerchantOrder.merchantOrderIdmerchantOrder.paidAmountdef parseNewStlye(self, merchantOrder, soup):isPrime = FalsemerchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrlfor script in soup.findAll("script"):if script.text:print script.textif "\"isPrime\":" in script.text:isPrime = "\"isPrime\":1" in script.textbreakorderDetailsContainer = soup.body.find(id="orderDetails")divAfterH1 = orderDetailsContainer.h1.next_sibling.next_siblingorderLeftDiv = divAfterH1.divplacedOnSpan = orderLeftDiv.find("span", {'class':'order-date-invoice-item'})merchantOrder.placedOn =placedOnSpan.text.split('Ordered on')[1].strip()merchantOrder.merchantOrderId = placedOnSpan.next_sibling.next_sibling.text.split('Order#')[1].strip()try:priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box-inner"}).div.div.findAll('div', recursive=False)[-1]except:priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box a-last"}).div.div.findAll('div', recursive=False)[-1]priceRows = priceBox.findAll('div', {'class':'a-row'})subTotal = 0shippingPrice = 0promoApplied = 0for priceRow in priceRows:if "Item(s) Subtotal:" in str(priceRow):subTotal = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))elif "Shipping:" in str(priceRow):shippingPrice = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))elif "Grand Total:" in str(priceRow):grandPrice = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))merchantOrder.paidAmount = grandPriceelif "Total:" in str(priceRow):totalPrice = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))elif "Promotion Applied:" in str(priceRow):promoApplied += int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))totalPaid = subTotalif promoApplied > 0:totalPaid -= promoAppliedif shippingPrice <= promoApplied:totalPaid += shippingPriceshipmentDivs = orderDetailsContainer.findAll('div', class_='shipment')subOrders = []merchantOrder.subOrders = subOrdersclosedStatus = TruesubOrders = []j=0for shipmentDiv in shipmentDivs:shipmentDiv = shipmentDiv.divtry:trackingUrl = 'http://www.amazon.in/' + shipmentDiv.find('span', class_='track-package-button').span.a.get('href')except:trackingUrl= NonedeliverySpanTop = NoneinnerBoxes = shipmentDiv.findAll('div', recursive = False)statusDiv = innerBoxes[0]subOrderStatus = statusDiv.div.span.text.strip()try:deliverySpanTop = statusDiv.div.div.find_next_sibling('div').spanexcept:pass#if not deliverySpanTop:productDivs = innerBoxes[-1].div.div.findAll('div', recursive=False)merchantOrder.subOrders = subOrdersfor i, productDiv in enumerate(productDivs):deliverySpan = deliverySpanTopif not deliverySpanTop:if i%2==1:continuedeliverySpan = productDiv.div.div.spantry:productDiv = productDivs[i+1]except:passfor prodRow in productDiv.findAll('div', recursive=False):j += 1imgDiv = prodRow.div.divdetailDiv = imgDiv.find_next_sibling('div')detailDivs = detailDiv.findAll('div', recursive=False)arr = re.split("^(\d+) of", detailDivs[0].a.text.strip())(productTitle, quantity) = (arr[-1], (1 if len(arr)<2 else int(arr[1])) )try:unitPrice = int(float(detailDivs[2].span.text.replace('Rs. ','').replace(',','')))except:unitPrice = int(float(detailDivs[3].span.text.replace('Rs. ','').replace(',','')))amountPaid = int((unitPrice*quantity*totalPaid)/subTotal)productUrl = "http://www.amazon.in" + detailDivs[0].a.get('href')subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)subOrder.productCode = productUrl.split('/')[5]subOrder.unitPrice = unitPricesubOrder.merchantSubOrderId = str(j) + " of " + merchantOrder.merchantOrderIdestDlvyTime = datetime.now()if deliverySpan is not None:try:subOrder.estimatedDeliveryDate = deliverySpan.span.text.strip()estDate = subOrder.estimatedDeliveryDate.split("-")[0].strip()subOrder.estimatedDeliveryInt = int(time.mktime((datetime.strptime(estDate, "%A %d %B %Y")).timetuple()))estDlvyTime = datetime.strptime(estDate, "%A %d %B %Y")except:if "Delivered on" in deliverySpan.text:subOrder.deliveredOn = deliverySpan.text.split(":")[1].strip()subOrder.estimatedDeliveryDate = "Not available"createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))subOrder.detailedStatus = subOrderStatusstatus=MStore.ORDER_PLACEDtry:status = self._getStatusFromDetailedStatus(subOrderStatus)except:try:dateString = subOrderStatus.split("Delivered ")[1].strip()subOrder.status = MStore.ORDER_DELIVEREDsubOrder.detailedStatus = 'Delivered'dateString = getDateStringDelivered(dateString)if dateString is not None:subOrder.deliveredOn = datetime.strftime(dateString, '%d-%b-%y')except:try:dateString = subOrderStatus.split("Arriving ")[1].split("by")[0].strip()subOrder.status = MStore.ORDER_SHIPPEDdateString = getDateStringArriving(dateString)if dateString is not None:subOrder.deliveryEstimate = datetime.strftime(dateString, '%d-%b-%y')else:subOrder.deliveryEstimate = subOrderStatus.split("Arriving ")[1].split("by")[0].strip()except:print "Unknown status Alert -", statussubOrder.deliveryCharges = shippingPriceif trackingUrl:subOrder.trackingUrl = trackingUrlsubOrder.imgUrl = imgDiv.img["src"]if isPrime:(cashbackAmount, percentage) = (0,0)else:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amountPaid/quantity)dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrder.maxNlc = dealRank.get('maxNlc')subOrder.minNlc = dealRank.get('minNlc')subOrder.db = dealRank.get('dp')subOrder.itemStatus = dealRank.get('status')cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmount*quantityif percentage > 0:subOrder.cashBackPercentage = percentageif hasattr(subOrder, 'deliveredOn') or subOrder.status==Store.ORDER_DELIVERED:subOrder.status = Store.ORDER_DELIVEREDsubOrder.closed = Trueif subOrder.cashBackStatus == Store.CB_PENDING:subOrder.cashBackStatus = Store.CB_APPROVEDelif closedStatus:closedStatus= FalsesubOrders.append(subOrder)merchantOrder.status='success'merchantOrder.closed = closedStatusself._updateToOrder(todict(merchantOrder))def parseCancelled(self, merchantOrder,soup):try:fonts = soup.body.findAll("table", recursive=False)[1].findAll("font")if fonts[0].text == "Important Message":if fonts[1].text=="This order has been cancelled.":merchantOrder.closed = TruemerchantOrder.status = "cancelled"merchantOrder.requireDetail = Falseself._updateToOrder(todict(merchantOrder))returnelse:raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)else:raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)except:orderDetails = soup.body.find(id="orderDetails")if orderDetails is not None and orderDetails.h4.text == "This order has been cancelled.":merchantOrder.closed = TruemerchantOrder.status = "cancelled"merchantOrder.requireDetail = Falseself._updateToOrder(todict(merchantOrder))else:raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)def getTrackingUrls(self, userId):missingOrderUrls = []missingOrders = self._getMissingOrders({'userId':userId})for missingOrder in missingOrders:missingOrderUrls.append(ORDER_REDIRECT_URL%(missingOrder['merchantOrderId']))orders = self._getActiveOrders({'userId':userId})count = len(orders)print "count", countprint "Missing Urls"print "*************"print missingOrderUrlsif count > 0:return missingOrderUrls + ['https://www.amazon.in/gp/css/order-history', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled&startIndex=10']else:return missingOrderUrlsdef trackOrdersForUser(self, userId, url, rawHtml):rawHtml = re.sub(r'[^\x00-\x7F]+',' ', rawHtml)directory = "/AmazonTrack/User" + str(userId)if not os.path.exists(directory):os.makedirs(directory)try:searchMap = {'userId':userId}collectionMap = {'merchantOrderId':1}activeOrders = self._getActiveOrders(searchMap, collectionMap)datetimeNow = datetime.now()timestamp = int(time.mktime(datetimeNow.timetuple()))print "url----------------", urlcancelledSummary = Falseif url == 'https://www.amazon.in/gp/css/order-history' or 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled' in url:if url == 'https://www.amazon.in/gp/css/order-history':filename = directory + "/orderSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')else:filename = directory + "/cancelledSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')cancelledSummary = Truef = open(filename,'w')f.write(rawHtml) # python will convert \n to os.linesepf.close() # you can omit in most cases as the destructor will call ifsoup = BeautifulSoup(rawHtml,'html5lib')allOrders = soup.find(id="ordersContainer").findAll('div', {'class':'a-box-group a-spacing-base order'})bulk = self.db.merchantOrder.initialize_ordered_bulk_op()for activeOrder in activeOrders:matched=Falsefor orderEle in allOrders:deliveredOn = NonedeliveryEstimate = NoneshippingEstimate = Noneorderdiv = orderEle.find('div', {'class':'a-box a-color-offset-background order-info'}).find('div', {'class':'a-fixed-right-grid-col actions a-col-right'})merchantOrderId = orderdiv.find('span', {'class':'a-color-secondary value'}).text.strip()if merchantOrderId==activeOrder['merchantOrderId']:matched=Trueclosed = Trueif not cancelledSummary:shipments = orderEle.findAll('div',{'class':re.compile('.*?shipment.*?')}, recursive=False)else:shipments = orderEle.findAll('div',{'class':re.compile('.*?a-box.*?')}, recursive=False)shipments.pop(0)for shipment in shipments:orderStatusDesc = Noneshipdiv = shipment.find('div', {'class':'a-box-inner'})sdivs = shipment.div.div.findAll('div', recursive=False)try:orderStatus = sdivs[0].span.text.strip()status = self._getStatusFromDetailedStatus(orderStatus)except:try:dateString = orderStatus.split("Delivered ")[1].strip()status = MStore.ORDER_DELIVEREDdeliveredOn = datetime.strftime(getDateStringDelivered(dateString), '%d-%b-%y')except:try:dateString = sdivs[0].span.text.strip().split("Arriving ")[1].split("by")[0].strip()status = MStore.ORDER_SHIPPEDdeliveryEstimate = datetime.strftime(getDateStringArriving(dateString), '%d-%b-%y')except:print "Unknown status Alert -", orderStatusprint merchantOrderId, "Order Status", orderStatustry:orderStatusDesc = sdivs[0].findAll('div')[1].div.text.strip()except:try:orderStatusDesc = sdivs[0].findAll('div')[1].text.strip()except:print "Order Status Description None or empty for", merchantOrderId, "and User", userIdif orderStatusDesc:print merchantOrderId, "Order status desc", orderStatusDesctry:status = self._getStatusFromDetailedStatus(orderStatus)except:passtry:if "Dispatch estimate" in orderStatusDesc:shippingEstimate = orderStatus.split("Dispatch estimate").split("-")[0].strip()elif "Delivery estimate" in orderStatus:deliveryEstimate = orderStatus.split("Delivery estimate").split("-")[0].strip()elif "Arriving" in orderStatus:deliveryEstimate = datetime.strftime(getDateStringArriving(orderStatus.split("Arriving")[1].strip().split("by")[0].strip()), '%d-%b-%y')except:print "Could not find anything relevent for merchantOrder", merchantOrderId, "and User", userIdclosed=Falsestatus = NoneproductDivs = shipdiv.find('div', {'class':re.compile('.*?a-spacing-top-medium.*?')}).find('div', {'class':'a-row'}).findAll('div', recursive=False)trackingUrl = Nonefor buttonDiv in shipdiv.findAll('span', {'class':'a-button-inner'}):if buttonDiv.find('a').text.strip()=='Track package':trackingUrl = buttonDiv.find('a')['href'].strip()if not trackingUrl.startswith("http"):trackingUrl = "http://www.amazon.in" + trackingUrlbreakfor prodDiv in productDivs:prodDiv.find('div', {'class':'a-fixed-left-grid-inner'})productTitle = prodDiv.find('div', {'class':'a-fixed-left-grid-inner'}).find("div", {'class':'a-row'}).find('a').text.strip()imgUrl = prodDiv.find("img")["src"]for subOrder in activeOrder['subOrders']:if subOrder['closed']==True:continueif subOrder['productTitle'] in productTitle:findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId")}updateMap = {}closedStatus = FalseupdateMap['subOrders.$.imgUrl'] = imgUrlupdateMap['subOrders.$.lastTracked'] = timestampif status:updateMap['subOrders.$.detailedStatus'] = orderStatusupdateMap['subOrders.$.status'] = statuscashbackStatus = subOrder.get("cashBackStatus")if status==MStore.ORDER_DELIVERED:if deliveredOn:updateMap['subOrders.$.deliveredOn'] = deliveredOnclosedStatus = TrueupdateMap['subOrders.$.closed'] = Trueif cashbackStatus == Store.CB_PENDING:updateMap['subOrders.$.cashBackStatus'] = Store.CB_APPROVEDif status==MStore.ORDER_CANCELLED:closedStatus = TrueupdateMap['subOrders.$.closed'] = Trueif cashbackStatus == Store.CB_PENDING:updateMap['subOrders.$.cashBackStatus'] = Store.CB_CANCELLEDif status==MStore.ORDER_SHIPPED:if deliveryEstimate:updateMap['subOrders.$.estimatedDeliveryDate'] = deliveryEstimateif trackingUrl is not None:updateMap['subOrders.$.trackingUrl'] = trackingUrlupdateMap['subOrders.$.trackMissing'] = Falseif shippingEstimate:updateMap['subOrders.$.estimatedShippingDate'] = shippingEstimateif not closedStatus:closed = False#{"subOrders.closed":False,"subOrders.trackingUrl":{"$exists":False},"subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}updateMap['status']='success'bulk.find(findMap).update({'$set' : updateMap})breakbulk.find({'orderId': activeOrder['orderId']}).update({"$set":{'closed':closed}})breakif not matched:updateMap = {'subOrders.$.trackMissing': True}for subOrder in activeOrder['subOrders']:if subOrder['closed']==True:continuefindMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId"),"subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}}bulk.find({'orderId': activeOrder['orderId']})bulk.find(findMap).update({'$set':updateMap})bulk.execute()return 'PARSED_SUCCESS'else:merchantOrderId = re.findall(r'https://www.amazon.in/gp/css/summary/edit.html\?orderID=(.*)?', url, re.IGNORECASE)[0]print "merchantOrderId", merchantOrderIdmerchantOrder = self.db.merchantOrder.find_one({"merchantOrderId":merchantOrderId})filename = directory + "/" + merchantOrderIdf = open(filename,'w')f.write(rawHtml) # python will convert \n to os.linesepf.close() # you can omit in most cases as the destructor will call ifresult = self.parseOrderRawHtml(merchantOrder['orderId'], merchantOrder['subTagId'], merchantOrder['userId'], rawHtml, url, True)['result']print "result", resulttry:order1 = session.query(OrdersRaw).filter_by(id=merchantOrder['orderId']).first()order1.status = resultorder1.rawhtml = rawHtmlorder1.order_url = urlsession.commit()except:traceback.print_exc()finally:session.close()return 'PARSED_SUCCESS'passreturn 'PARSED_SUCCESS_NO_ORDERS'except:traceback.print_exc()return 'PARSED_FAILED'def _getStatusFromDetailedStatus(self, detailedStatus):if "ordered from" in detailedStatus.lower():return MStore.ORDER_PLACEDfor key, value in self.orderStatusRegexMap.iteritems():if detailedStatus.lower() in value:return keyprint "Detailed Status need to be mapped", "Store:", self.store_id, detailedStatusraise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)def scrapeAffiliate(self, startDate=None, endDate=None):br = getBrowserObject()br.add_password('https://assoc-datafeeds-eu.amazon.com', 'Saholic', 'Fnubyvp')url = AMAZON_AFF_URLresponse = br.open(url)#get data for past 40 days and store it to mongodt = datetime.now()dat = dt - timedelta(days=2)url = AMAZON_AFF_FILE_URL%(datetime.strftime(dat, "%Y%m%d"))response = br.open(url)page = gzip.GzipFile(fileobj=response, mode='rb').read()j=-1for row in page.split("\n"):j += 1if j== 0 or j==1:continuefields = row.split("\t")if len(fields)>1:print fieldsamazonAffiliate = AmazonAffiliateInfo(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], fields[8], fields[9])print amazonAffiliateself.db.amazonAffiliateInfo.insert(todict(amazonAffiliate))else:breakdef parseTrackingUrl(self, trackingUrl, orderId):print trackingUrlsubOrder = {}page = fetchResponseUsingProxy(trackingUrl)status = MStore.ORDER_SHIPPED#print pagesoup = BeautifulSoup(page)header1 = soup.find("h1")if header1:if header1.text=="Sign In" or header1.text.strip()=="Login":print "Login page is displayed for order id", orderIdself.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})subOrder['login'] = Truereturn subOrdertry:print "Tracking page is displayed for order id", orderIddetailedStatus = soup.find("div", {"class":"top"}).span.text.strip()try:displayStatus = soup.find("div",{"class":"a-column a-span12 shipment-status-content"}).span.text.strip()except:displayStatus = detailedStatusprint displayStatusif detailedStatus.lower().find("delivered")>=0:print detailedStatusdisplayStatus = "Delivered"status = "Delivered"try:subOrder["deliveredOn"] = detailedStatus.split("on")[1].strip()except:passelif detailedStatus.lower() == 'returned':status = 'Cancelled'subOrder['status'] = statussubOrder['detailedStatus'] = displayStatusself.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":False}})except:self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})print "failed to parse", orderIdtraceback.print_exc()return subOrderdef parseInfo(self,):from pyquery import PyQuery as pqorders = list(session.query(Orders).filter_by(store_id=self.store_id).filter_by(status='DETAIL_CREATED').group_by(Orders.user_id).all())try:for order in orders:try:doc = pq(order.rawhtml)#a1= " ".join(["" if not div.text else div.text.replace("\t","").replace("\n","").replace(" ", "") for div in pq(doc('article')[-1])('div')])lists = doc('ul.displayAddressUL li')orderInfo = All_user_addresses()orderInfo.address = lists[-3].textorderInfo.user_id = order.user_idorderInfo.source = 'order'#orderInfo.order_id = order.id#orderInfo.email = None#orderInfo.name = lists[0].text#orderInfo.mobile = NoneadSplit = lists[-2].text.split(",")match = re.match(r"([a-z ]+)([0-9]+)", adSplit[1], re.I)if match:items = match.groups()orderInfo.city = adSplit[0].strip()orderInfo.pincode = items[1].strip()orderInfo.state = items[0].strip().title()session.commit()except:session.rollback()continuefinally:session.close()def main():store = getStore(1)store.parseOrderRawHtml("444444", '123', 14, readSSh('/home/amit/amit.txt'), 'https://www.amazon.in/gp/css/summary/edit.html?orderID=402-0540293-4683515')# orders = list(session.query(OrdersRaw).filter_by(status = 'DETAIL_NOT_CREATED_UNKNOWN').filter(OrdersRaw.id > 61071).all())# session.close()# for o in orders:# try:# store.trackOrdersForUser(o.id, o.order_url, o.rawhtml)# finally:# session.close()#store.trackOrdersForUser(10466, 'https://www.amazon.in/gp/css/summary/edit.html?orderID=403-7498756-0837158', readSSh('/AmazonTrack/User10466/403-7498756-0837158'))#store.trackOrdersForUser(46195, 'https://www.amazon.in/gp/css/summary/edit.html?orderID=404-4294022-1187515', readSSh('/home/amit/amazon.html'))def getSummaryFile(directory):date1 = datetime(2015,1,1)finalFile = Nonetry:for file in os.listdir(directory):if file.startswith("orderSummary"):date2 = datetime.strptime("2015-" + file.split("orderSummary")[1].split(":")[0], "%Y-%d-%m")if date2 > date1:date1 = date2finalFile=fileexcept:print "Missing directory"return finalFiledef parseDetailNotCreated():try:store=getStore(1)orders = session.query(OrdersRaw).filter_by(status='DETAIL_NOT_CREATED_UNKNOWN').all()session.close()for order in orders:store.trackOrdersForUser(order.id, order.order_url, order.rawhtml)finally:session.close()def getDateStringDelivered(dateString='Monday'):print dateStringif dateString.lower()=='today':return date.today()if dateString.lower()=='yesterday':return date.today() - timedelta(days=1)try:return datetime.strptime(dateString, '%d-%b-%y')except:try:#get Closest Date from todaycurDate = date.today()curTime = datetime(curDate.year, curDate.month, curDate.day)curYear = curDate.yearprevYear = curYear - 1dateMax = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")dateMin = datetime.strptime(dateString + " " + str(prevYear), "%A, %d %b %Y")if dateMax <= curTime:return dateMaxelse:return dateMinexcept:try:days_of_week = ['sunday','monday','tuesday','wednesday','thursday','friday','saturday']deltaDays = curDate.isoweekday() - days_of_week.index(dateString.lower())if deltaDays <= 0:deltaDays= deltaDays + 7curDate = curDate - timedelta(days=deltaDays)print datetime.strftime(curDate, '%d-%b-%y')return curDateexcept:print "could not parse"return Nonedef getDateStringArriving(dateString='Thursday'):print dateStringif dateString.lower()=='today':return date.today()if dateString.lower()=='tomorrow':return date.today() + timedelta(days=1)try:return datetime.strptime(dateString, '%d-%b-%y')except:try:#get Closest Date from todaycurDate = date.today()curTime = datetime(curDate.year, curDate.month, curDate.day)curYear = curDate.yearnextYear = curYear + 1dateMin = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")dateMax = datetime.strptime(dateString + " " + str(nextYear), "%A, %d %b %Y")if dateMin >= curTime:return dateMinelse:return dateMaxexcept:try:days_of_week = ['sunday','monday','tuesday','wednesday','thursday','friday','saturday']deltaDays = days_of_week.index(dateString.lower()) - curDate.isoweekday()if deltaDays < 0:deltaDays= deltaDays + 7curDate = curDate + timedelta(days=deltaDays)return curDateexcept:print "Could not parse"return Nonedef main1():store = getStore(1)for merchantOrder in store.db.merchantOrder.find({"subOrders":{"$elemMatch":{"cashBackStatus":"Not Applicable", "cashBackPercentage":{"$gt":0}}}}):mo = obj(merchantOrder)for subOrder in mo.subOrders:subOrder.closed=Falseprint "orderId", mo.orderIdstore.populateDerivedFields(mo, False)store.db.merchantOrder.update({"orderId":mo.orderId}, {"$set":todict(mo)})breakdef parseOrderNotCreated():try:store=getStore(1)orders = session.query(OrdersRaw).filter_by(status='ORDER_NOT_CREATED_UNKNOWN').all()session.close()for order in orders:result = store.parseOrderRawHtml(order.id, order.sub_tag, order.user_id, order.rawhtml, order.order_url)['result']order1 = session.query(OrdersRaw).filter_by(id=order.id).first()order1.status = resultsession.commit()finally:session.close()if __name__ == '__main__':# readSSh("/AmazonTrack/User2942/402-2467356-7564367")# readSSh("/AmazonTrack/User10466/orderSummary18-11:21:32:36")# readSSh("/AmazonTrack/User5525/171-0333104-6169933")main()#store=getStore(1)#store.trackOrdersForUser(5525, "https://www.amazon.in/gp/css/summary/edit.html?orderID=171-0333104-6169933", readSSh("/AmazonTrack/User5525/171-0333104-6169933"))