Rev 20472 | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: amit'''from bs4 import BeautifulSoupfrom bson.binary import Binaryfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.api.Service import Ordersfrom dtr.dao import AffiliateInfo, Order, SubOrderfrom dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, \Store as MStore, ungzipResponse, tprintfrom dtr.storage import Mongofrom dtr.storage.DataService import Order_Parse_Info, All_user_addresses, \OrdersRawfrom dtr.storage.Mongo import getImgSrc, getDealRankfrom dtr.utils import utilsfrom dtr.utils.utils import fetchResponseUsingProxy, readSShfrom elixir import *from pprint import pprintfrom pymongo import MongoClientfrom pyquery import PyQueryfrom urlparse import urlparse, parse_qsfrom xlrd import open_workbookimport csvimport jsonimport os.pathimport pymongoimport reimport timeimport tracebackimport urllibimport urllib2USERNAME='profittill2@gmail.com'PASSWORD='spice@2020'AFFILIATE_URL='http://affiliate.snapdeal.com/login/'POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'#NEW_REPORT_URI_TEMPLATE = "http://affiliate.snapdeal.com/affiliate/reports/orders/report/?fromDate=%s&toDate=%s&dump_report=True&request_type=product&status=%s"AFF_URL_TEMPLATE = "http://affiliate-feeds.snapdeal.com/feed/api/order?startDate=%s&endDate=%s&status=%s"#"http://affiliate.snapdeal.com/affiliate/reports/orders/report/?fromDate=2015-04-01&toDate=2015-09-15&dump_report=True&request_type=product&status=cancelled#"http://affiliate.snapdeal.com/affiliate/reports/orders/report/?fromDate=07-09-2015&toDate=13-09-2015&dump_report=True&request_type=product&status=cancelledAFF_ID = "33550"AFF_TOKEN="66d526141b9d39c4b2b4ff76eadc34"headers = {'User-agent':'Mozilla/5.0 (iPhone; CPU iPhone OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5376e Safari/8536.25','Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Accept-Language' : 'en-US,en;q=0.8','Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection':'keep-alive','Accept-Encoding' : 'gzip,deflate,sdch'}class Store(MStore):'''This is to map order statuses of our system to order statuses of snapdeal.And our statuses will change accordingly.'''OrderStatusMap = {MStore.ORDER_PLACED : ['in progress', 'pending for verification', 'not available', 'in process','processing', 'processed', 'under verification', 'readying for dispatch','waiting for courier to pick up', 'processing initiated','prepared for dispatch','dispatching soon','item packed. dispatching soon.','cancellation requested'],MStore.ORDER_DELIVERED : ['delivered','delivered successfully!'],MStore.ORDER_SHIPPED : ['in transit', 'dispatched', 'handed over to courier', 'undelivered. update delivery details!','out for delivery','undelivered. edit delivery details!','undelivered' ,'all delivery attempts failed','delivery attempt failed','delivery details updated'],MStore.ORDER_CANCELLED : ['closed for vendor reallocation', 'cancelled', 'product returned by courier', 'returned', 'n/a', 'courier returned','verification failed''a new order placed with a different seller', 'closed', 'cancellation in progress', 'verification failed. order cancelled','cancelled. payment refunded','closed. new order placed','cancelling','cancelling order','order cancelled','payment failed. order cancelled','returned to snapdeal','pickup sent','refund approved','refund successful!','undelivered. order cancelled','order cancelled. refunded successfully','return request registered','pickup failed. please reschedule','replacement order successfully placed','replacement request received', 'verification failed due to nbp. order cancelled','verification failed due to where customer don\'t want order. order cancelled']}CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICEdef __init__(self,store_id):super(Store, self).__init__(store_id)def getName(self):return "snapdeal"def scrapeAffiliate(self, startDate=None, endDate=None):endDate=date.today() - timedelta(days=1)if startDate is None:startDate = endDate - timedelta(days=45)endDate = endDate.strftime('%Y-%m-%d')startDate = startDate.strftime('%Y-%m-%d')statuses=['cancelled', 'approved']for status in statuses:nextUrl = AFF_URL_TEMPLATE%(startDate, endDate, status)while nextUrl:req = urllib2.Request(nextUrl)nextUrl=''req.add_header('Snapdeal-Affiliate-Id', AFF_ID)req.add_header('Snapdeal-Token-Id', AFF_TOKEN)req.add_header('Accept', 'application/json')resp = urllib2.urlopen(req)ordersDict = json.loads(resp.read())self._saveToAffiliate(ordersDict['productDetails'], status)def _setLastSaleDate(self, saleDate):self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})def _getLastSaleDate(self,):lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})if lastDaySaleObj is None:return datetime.mindef _parseB(self, orderId, subTagId, userId, page, orderSuccessUrl):soup = BeautifulSoup(page)orderDetailContainerDivs = soup.body.find("div", {'class':'cardLayoutWrap'}).findAll('div', recursive=False)orderDetailDiv = orderDetailContainerDivs.pop(0)paymentDetailDiv = orderDetailContainerDivs.pop(0)subOrders = orderDetailContainerDivsplacedOn = orderDetailDiv.span.text.split(':')[1].strip()merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = parse_qs(urlparse(orderSuccessUrl).query)['order'][0]paymentDivs = paymentDetailDiv.findAll('div', recursive=False)paymentDivs.pop(0)for orderTr in paymentDivs:orderTrString = str(orderTr)if "Total Amount Paid" in orderTrString:amountPaid = orderTr.div.find('div', {'class':'detailBlock'}).text.strip()merchantOrder.paidAmount = int(re.findall(r'\d+', amountPaid)[0])elif "Total Amount" in orderTrString:merchantOrder.totalAmount = re.findall(r'\d+', orderTrString)[0]elif "Delivery Charges" in orderTrString:merchantOrder.deliveryCharges = re.findall(r'\d+', orderTrString)[0]elif "Discount Applied" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]elif "Offer Discount" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]merchantSubOrders = []for subOrderElement in subOrders:subOrder = self.parseSubOrderB(subOrderElement, placedOn)if subOrder is not None:dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrder.maxNlc = dealRank.get('maxNlc')subOrder.minNlc = dealRank.get('minNlc')subOrder.db = dealRank.get('dp')subOrder.itemStatus = dealRank.get('status')merchantSubOrders.append(subOrder)merchantOrder.subOrders = merchantSubOrdersreturn merchantOrderdef _parse(self, orderId, subTagId, userId, page, orderSuccessUrl):#page=page.decode("utf-8")soup = BeautifulSoup(page)#orderHead = soup.find(name, attrs, recursive, text)sections = soup.findAll("section")#print sectionsorder = sections[1]orderTrs = order.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)#Pop two section elementssections.pop(0)sections.pop(0)subOrders = sectionsmerchantSubOrders = []merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1]for orderTr in orderTrs:orderTrString = str(orderTr)if "Total Amount" in orderTrString:merchantOrder.totalAmount = re.findall(r'\d+', orderTrString)[0]elif "Delivery Charges" in orderTrString:merchantOrder.deliveryCharges = re.findall(r'\d+', orderTrString)[0]elif "Discount Applied" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]elif "Paid Amount" in orderTrString:merchantOrder.paidAmount = re.findall(r'\d+', orderTrString)[0]for subOrderElement in subOrders:subOrders = self.parseSubOrder(subOrderElement, placedOn)merchantSubOrders.extend(subOrders)merchantOrder.subOrders = merchantSubOrdersreturn merchantOrderdef parseSubOrder(self, subOrderElement, placedOn):subOrders = []productUrl = str(subOrderElement.find("a")['href'])subTable = subOrderElement.find("table", {"class":"lrPad"})subTrs = subTable.findAll("tr")unitPrice=NoneofferDiscount = 0deliveryCharges = NoneamountPaid = Noneamount = 0sdCash = 0unitPrice = 0for subTr in subTrs:subTrString = str(subTr)if "Unit Price" in subTrString:unitPrice = int(re.findall(r'\d+', subTrString)[0])if "Quantity" in subTrString:qty = int(re.findall(r'\d+', subTrString)[0])elif "Offer Discount" in subTrString:offerDiscount += int(re.findall(r'\d+', subTrString)[0])elif "SD Cash" in subTrString:sdCash = int(re.findall(r'\d+', subTrString)[0])elif "Delivery Charges" in subTrString:deliveryCharges = int(re.findall(r'\d+', subTrString)[0])elif "Subtotal" in subTrString:if int(qty) > 0:amountPaid = int(re.findall(r'\d+', subTrString)[0])/qtyelse:amountPaid = 0if qty>0:amount = unitPrice - offerDiscount - sdCashamount = 0 if amount < 0 else amountdiv1 = subOrderElement.find("div", {"class": "blk lrPad subordrs"})if div1 is None:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")for strDiv in str(div1).split("<div class=\"seperator\"></div>"):div = BeautifulSoup(strDiv)productTitle = str(subOrderElement.find("a").text)productUrl = "http://m.snapdeal.com/" + productUrlsubOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)subOrder.amountPaid = amountPaidsubOrder.deliveryCharges = deliveryChargessubOrder.offerDiscount = offerDiscountsubOrder.unitPrice = int(unitPrice)subOrder.productCode = re.findall(r'\d+$', productUrl)[0]subOrder.imgUrl = Mongo.getImgSrc(subOrder.productCode, self.store_id).get('thumbnail')cashbackStatus = Store.CB_NAcashbackAmount = 0percentage = 0if amount > 0:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)if cashbackAmount > 0:cashbackStatus = Store.CB_PENDINGsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountsubOrder.cashBackPercentage = percentagetrackAnchor = div.find("a")if trackAnchor is not None:subOrder.tracingkUrl = str(trackAnchor['href'])divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")for line in divStr.split("<br />"):if "Suborder ID" in line:subOrder.merchantSubOrderId = re.findall(r'\d+', line)[0]elif "Status" in line:print linesubOrder.detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]elif "Est. Shipping Date" in line:subOrder.estimatedShippingDate = line.split(":")[1].strip()elif "Est. Delivery Date" in line:subOrder.estimatedDeliveryDate = line.split(":")[1].strip()elif "Courier Name" in line:subOrder.courierName = line.split(":")[1].strip()elif "Tracking No" in line:subOrder.trackingNumber = line.split(":")[1].strip()subOrders.append(subOrder)return subOrdersdef parseSubOrderB(self, subOrderElement, placedOn):subOrders = []prodDivs = subOrderElement.findAll('div', recursive=False)prodDetailDiv = prodDivs[1].findAll('div', recursive=False)offerDiscount = 0deliveryCharges = NoneamountPaid = 0sdCash = 0unitPrice = 0paymentDivs = prodDivs[2].findAll('div', recursive=False)for paymentDiv in paymentDivs:strPaymentDiv = str(paymentDiv)if "Unit Price" in strPaymentDiv:try:unitPrice = int(re.findall(r'\d+', strPaymentDiv)[0])except:return Noneelif "Offer Discount" in strPaymentDiv:offerDiscount += int(re.findall(r'\d+', strPaymentDiv)[0])elif "Discount" in strPaymentDiv:offerDiscount += int(re.findall(r'\d+', strPaymentDiv)[0])elif "SD Cash" in strPaymentDiv:sdCash = int(re.findall(r'\d+', strPaymentDiv)[0])elif "Delivery Charges" in strPaymentDiv:deliveryCharges = int(re.findall(r'\d+', strPaymentDiv)[0])elif "Subtotal" in strPaymentDiv:amountPaid = int(re.findall(r'\d+', paymentDiv.find('div', {'class':'itemPriceDetail'}).text)[0])amount = unitPrice - offerDiscount - sdCashimgDiv = prodDetailDiv[0]otherDiv = prodDetailDiv[1]productTitle = otherDiv.find('div',{'class':'orderName'}).text.strip()productUrl = imgDiv.a['href']subOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)subOrder.merchantSubOrderId = prodDivs[0].text.split(':')[1].strip()subOrder.detailedStatus = otherDiv.find('div',{'class':'orderStatus'}).span.text.strip()if subOrder.detailedStatus.lower() == "processing":processingDetailedStatus = subOrderElement.find('div', {'class':'trackingMessage'}).text.strip()if processingDetailedStatus.lower() == 'a new order placed with a different seller':subOrder.detailedStatus = processingDetailedStatusdeliveryStatus = otherDiv.find('div',{'class':'orderDelivery'})if deliveryStatus is not None:delString = deliveryStatus.text.strip()arr = delString.split(':')if "On" in arr[0]:subOrder.deliveredOn = arr[1].strip()elif "Exp. Delivery by" in arr[0]:subOrder.estimatedDeliveryDate = arr[1].strip()elif "Est. delivery between" in arr[0]:subOrder.estimatedDeliveryDate = arr[0].split("between")[1].strip()elif "Est. shipping between" in arr[0]:subOrder.estimatedShippingDate = arr[0].split("between")[1].strip()else:subOrder.estimatedShippingDate = arr[1].strip()subOrder.imgUrl = imgDiv.a.img['src']subOrder.productCode = re.findall(r'\d+$', productUrl)[0]subOrder.deliveryCharges = deliveryChargessubOrder.offerDiscount = offerDiscountsubOrder.unitPrice = int(unitPrice)cashbackStatus = Store.CB_NAcashbackAmount = 0percentage = 0if amountPaid > 0:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)if cashbackAmount > 0:cashbackStatus = Store.CB_PENDINGsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountsubOrder.cashBackPercentage = percentagecourierDet = subOrderElement.find('div', {'class':'courierDetail'})if courierDet is not None:subOrder.courierName = courierDet.span.text.strip()trackingDet = subOrderElement.find('div', {'class':'trackingNo'})if trackingDet is not None:subOrder.trackingUrl = trackingDet.span.a['href']subOrder.trackingNumber = trackingDet.span.a.text.strip()subOrders.append(subOrder)return subOrderdef getOrderJSON(self, pq, supcMap):#print rawHtml# replace_with = {# '<': '>',# '>': '<',# '&': '&',# '"': '"', # should be escaped in attributes# "'": ''' # should be escaped in attributes# }secondryIdentiferSupcMap = {}for scriptTag in pq.items("script"):if "var reqData1 =" in scriptTag.text():match = re.search("(\[.*?\])",scriptTag.text(), re.DOTALL)a = match.group(1)if a:for mapElement in json.loads(a):secondryIdentiferSupcMap[mapElement["pog_id"]] = mapElement["supc"]breakjsonValue = pq("#orderJSON").attr("value")jsonValue.replace(""", '"')jsonValue.replace("&", '&')jsonValue.replace(">", '>')jsonValue.replace("<", '<')jsonValue.replace("'", "'")allSupcElements = pq('div.mdt-layout')('div.mdt-card')('div.order-item')for supcElement in allSupcElements:try:supcElement = pq(supcElement)title = supcElement('div.order-heading').text().strip()productUrl = supcElement.attr("data-href")imgUrl = supcElement.find('img').attr('src')secondaryIdentifier = productUrl.split("/")[-1]if secondryIdentiferSupcMap.has_key(secondaryIdentifier):supc = secondryIdentiferSupcMap[secondaryIdentifier]else:supc = self.catalogdb.MasterData.find_one({"secondaryIdentifier": secondaryIdentifier, "source_id":self.store_id})if supc:supc = supc['identifier']supcMap[supc] = {'title':title, 'imgUrl':imgUrl, 'productUrl':productUrl}except:passreturn json.loads(jsonValue)def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):#print merchantOrderresp = {}orderPart = re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]url = ORDER_TRACK_URL + orderPartmoId = orderPart.split("order=")[-1].split("&")[0]if self.db.merchantOrder.find_one({"merchantOrderId":moId}):resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'return respsupcMap = {}try:pq = PyQuery(rawHtml)try:if pq("title").text()=="Webpage not available":raiseorderJSON = self.getOrderJSON(pq, supcMap)except:traceback.print_exc()resp['result'] = 'ORDER_NOT_CREATED_KNOWN'return resp'''page =fetchResponseUsingProxy(url)try:merchantOrder = self._parseB(orderId, subTagId, userId, page, orderSuccessUrl)except:traceback.print_exc()merchantOrder = self._parse(orderId, subTagId, userId, page, orderSuccessUrl)'''merchantOrder = self._parseC(orderId, subTagId, userId, supcMap, orderJSON, orderSuccessUrl)merchantOrder.orderTrackingUrl = urlif self._saveToOrder(todict(merchantOrder)):resp['result'] = 'ORDER_CREATED'else:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'print "=================", resp, orderId, "=============="return respexcept:print "Error occurred"traceback.print_exc()resp['result'] = 'ORDER_NOT_CREATED'print "=================", resp, orderId, "=============="return resp#soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)#soup.find(name, attrs, recursive, text)def _parseC(self, orderId, subTagId, userId, supcMap, orderJSON, orderSuccessUrl):print json.dumps(orderJSON)merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)placedOn = datetime.strftime(utils.fromTimeStamp(orderJSON['created']/1000), "%a, %d %b, %Y")merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = orderJSON['code']merchantOrder.paidAmount = orderJSON['paidAmount']merchantOrder.deliveryCharges = orderJSON['shippingCharges']merchantOrder.closed= FalsemerchantSubOrders = []for s in orderJSON['suborders']:map1 = supcMap[s['supcCode']]amountPaid = s['paidAmount']productTitle = map1['title']productUrl = map1['productUrl']subOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)if(s.get('deliveryDate') is not None):print "Delivered On",subOrder.deliveredOn = datetime.strftime(utils.fromTimeStamp(s.get('deliveryDate')/1000),'%d %b, %Y')subOrder.status = MStore.ORDER_DELIVEREDsubOrder.detailedStatus = MStore.ORDER_DELIVEREDelif s['suborderStatus'].get('macroDescription')== 'Closed':if s['suborderStatus'].get('value')== 'Close for vendor reallocation':subOrder.detailedStatus = 'Close for vendor reallocation'subOrder.status = MStore.ORDER_CANCELLEDtry:subOrder.detailedStatus = s['suborderStatus']['macroDescription']subOrder.status = self._getStatusFromDetailedStatus(subOrder.detailedStatus)except:print "----------------", s['suborderStatus']subOrder.merchantSubOrderId = s['code']subOrder.deliveryCharges = s['shippingCharges']subOrder.productCode = re.findall(r'\d+$', productUrl)[0]dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrder.maxNlc = dealRank.get('maxNlc')subOrder.minNlc = dealRank.get('minNlc')subOrder.db = dealRank.get('dp')subOrder.itemStatus = dealRank.get('status')subOrder.imgUrl = map1['imgUrl']subOrder.unitPrice = s['offerPrice'] -s['internalCashbackValue'] - s['externalCashbackValue']subOrder.amount = subOrder.unitPrice - s['offerDiscount'] - s['sdCash']try:try:if s['shipDateRange']['start']==s['shipDateRange']['end']:subOrder.estimatedShippingDate = datetime.strftime(utils.fromTimeStamp(s['shipDateRange']['start']/1000),'%d %b, %Y')else:subOrder.estimatedShippingDate = datetime.strftime(utils.fromTimeStamp(s['shipDateRange']['start']/1000),'%d %b, %Y') + " - " + datetime.strftime(utils.fromTimeStamp(s['shipDateRange']['end']/1000),'%d %b, %Y')except:if s['deliveryDateRange']['start']==s['deliveryDateRange']['end']:subOrder.estimatedDeliveryDate = datetime.strftime(utils.fromTimeStamp(s['deliveryDateRange']['start']/1000),'%d %b, %Y')else:subOrder.estimatedDeliveryDate = datetime.strftime(utils.fromTimeStamp(s['deliveryDateRange']['start']/1000),'%d %b, %Y') + " - " + datetime.strftime(utils.fromTimeStamp(s['deliveryDateRange']['end']/1000),'%d %b, %Y')except:passsubOrder.offerDiscount = s['offerDiscount']subOrder.unitPrice = s['offerPrice']merchantSubOrders.append(subOrder)merchantOrder.subOrders = merchantSubOrdersself.populateDerivedFields(merchantOrder)return merchantOrderdef _getStatusFromDetailedStatus(self, detailedStatus):for key, value in Store.OrderStatusMap.iteritems():if detailedStatus.lower() in value:return keyelif 'order cancelled' in detailedStatus.lower():return keyprint "Detailed Status need to be mapped", detailedStatus, self.store_idreturn Nonedef scrapeStoreOrders(self,):#collectionMap = {'palcedOn':1}orders = self._getActiveOrders()for order in orders:order = obj(order)print "Order", self.store_name, order.orderIdtry:url = order.orderTrackingUrlpage = fetchResponseUsingProxy(url, headers=headers)#page=page.decode("utf-8")try:pq = PyQuery(page)subOrderStatusMap={}for el in pq('div.cardLayout.pad-10.mb-10'):elpq = PyQuery(el)try:subOrderId = elpq("div.subOrderId").text().split(":")[1].strip()subOrderStatusMap[subOrderId] = elpqexcept:passclosedForReco = {}for suborder in order.subOrders:if suborder.closed:if suborder.merchantSubOrderId in subOrderStatusMap:del subOrderStatusMap[suborder.merchantSubOrderId]continueif subOrderStatusMap.has_key(suborder.merchantSubOrderId):elpq = subOrderStatusMap.get(suborder.merchantSubOrderId)del subOrderStatusMap[suborder.merchantSubOrderId]if elpq("#trackLink").attr("href"):suborder.trackingUrl = elpq("#trackLink").attr("href")if elpq('span.subOrdStatusText').text():suborder.estimatedDeliveryDate=elpq('span.subOrdStatusText').text().strip()suborder.detailedStatus = elpq("div.orderStatus span").text().strip()if suborder.detailedStatus in ['Closed. New Order Placed','Closed. Placing New Order'] :closedForReco[suborder.merchantSubOrderId] = subordersuborder.status = self._getStatusFromDetailedStatus(suborder.detailedStatus)if len(closedForReco) == len(subOrderStatusMap) and len(closedForReco)>0:productCode = ''allProductsSame = Truefor subOrderId, subo in closedForReco.iteritems():if productCode == '':productCode = subo.productCodecontinueif subo.productCode != productCode:allProductsSame = FalsebreaksubOrderStatusMapif allProductsSame:print "singlereco", order.orderIdfor key, elpq in subOrderStatusMap.iteritems():suborderNew = obj(todict(suborder))suborderNew.merchantSubOrderId = keyif elpq("#trackLink").attr("href"):suborderNew.trackingUrl = elpq("#trackLink").attr("href")if elpq('span.subOrdStatusText').text():suborderNew.estimatedDeliveryDate=elpq('span.subOrdStatusText').text().strip()suborderNew.detailedStatus = elpq("div.orderStatus span").text().strip()suborderNew.status = self._getStatusFromDetailedStatus(suborderNew.detailedStatus)order.subOrders.append(suborderNew)else:print "All products not same referring transaction url", order.orderIdelse:pass#Lookout for ordersummary page for exact mappingself.populateDerivedFields(order, update=True)self._updateToOrder(todict(order))except:traceback.print_exc()soup = BeautifulSoup(page)try:self.tryBParsing(order, soup)except:traceback.print_exc()sections = soup.findAll("section")orderEl = sections[1]orderTrs = orderEl.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)sections.pop(0)sections.pop(0)subOrders = sectionsbulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = Truefor subOrderElement in subOrders:div1 = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})if len(div1)<=0:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")subOrder = NonebreakFlag = Falsefor strDiv in str(div1).split("<div class=\"seperator\"></div>"):div = BeautifulSoup(strDiv)divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")updateMap = {}for line in divStr.split("<br />"):if "Suborder ID" in line:merchantSubOrderId = re.findall(r'\d+', line)[0]#break if suborder is inactivesubOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:subOrders = self.parseSubOrder(subOrderElement, placedOn)self.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict(subOrders)}}})print "Added new suborders to Order id - ", order['orderId']closed = FalsebreakFlag = Truebreakelif subOrder['closed']:breakFlag = Truebreakelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}elif "Status :" in line:detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]updateMap["subOrders.$.detailedStatus"] = detailedStatusstatus = self._getStatusFromDetailedStatus(detailedStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]if status is not None:updateMap["subOrders.$.status"] = statusif detailedStatus == 'Closed For Vendor Reallocation':#if it is more than 6hours mark closed.closeAt = subOrder.get("closeAt")if closeAt is None:closeAt = datetime.now() + timedelta(hours=6)updateMap["subOrders.$.closeAt"] = datetime.strftime(closeAt,"%Y-%m-%d %H:%M:%S")else:closeAt = datetime.strptime(closeAt,"%Y-%m-%d %H:%M:%S")if datetime.now() > closeAt:closedStatus = True#Close if not applicable suborders are not closedif utils.fromTimeStamp(order['createdOnInt'] + 35*86400*1000) < datetime.now() and subOrder['cashBackStatus']==utils.CB_NA:closedStatus=Trueif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falseelif "Est. Shipping Date" in line:estimatedShippingDate = line.split(":")[1].strip()updateMap["subOrders.$.estimatedShippingDate"] = estimatedShippingDateelif "Est. Delivery Date" in line:estimatedDeliveryDate = line.split(":")[1].strip()updateMap["subOrders.$.estimatedDeliveryDate"] = estimatedDeliveryDateelif "Courier Name" in line:courierName = line.split(":")[1].strip()updateMap["subOrders.$.courierName"] = courierNameelif "Tracking No" in line:trackingNumber = line.split(":")[1].strip()updateMap["subOrders.$.trackingNumber"] = trackingNumberif breakFlag:continuebulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)except:traceback.print_exc()tprint("Could not update " + str(order.orderId) + "For store " + self.getName())self.db.merchantOrder.update({"orderId":order.orderId}, {"$set":{"parseError":True}})def tryBParsing(self, order, soup):orderDetailContainerDivs = soup.body.find("div", {'class':'cardLayoutWrap'}).findAll('div', recursive=False)orderDetailDiv = orderDetailContainerDivs.pop(0)placedOn = orderDetailDiv.span.text.split(':')[1].strip()orderDetailContainerDivs.pop(0)subOrders = orderDetailContainerDivsbulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = Truefor subOrderElement in subOrders:prodDivs = subOrderElement.findAll('div', recursive=False)merchantSubOrderId = prodDivs[0].text.split(':')[1].strip()subOrder = NonesubOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:try:subOrder = self.parseSubOrderB(subOrderElement, placedOn)if subOrder is None:continueself.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict([subOrder])}}})print "Added new suborders to Order id - ", order['orderId']closed = Falseexcept:passcontinueelif subOrder['closed']:continueelse:prodDetailDiv = prodDivs[1].findAll('div', recursive=False)otherDiv = prodDetailDiv[1]trackBlock = subOrderElement.find('div',{'class':'trackingDetailsBlock'})findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}updateMap = {}detailedStatus = otherDiv.find('div',{'class':'orderStatus'}).span.text.strip()if 'A new order placed with a different seller' in str(trackBlock):#if it is more than 6hours mark closed.closeAt = subOrder.get("closeAt")if closeAt is None:closeAt = datetime.now() + timedelta(hours=6)updateMap["subOrders.$.closeAt"] = datetime.strftime(closeAt,"%Y-%m-%d %H:%M:%S")bulk.find(findMap).update({'$set' : updateMap})closed=Falsecontinueelse:closeAt = datetime.strptime(closeAt,"%Y-%m-%d %H:%M:%S")if datetime.now() > closeAt:detailedStatus = 'A new order placed with a different seller'status = self._getStatusFromDetailedStatus(detailedStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]updateMap["subOrders.$.detailedStatus"] = detailedStatusif status is not None:updateMap["subOrders.$.status"] = statusif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = FalsedeliveryStatus = otherDiv.find('div',{'class':'orderDelivery'})if deliveryStatus is not None:delString = deliveryStatus.text.strip()arr = delString.split(':')if "On" in arr[0]:updateMap['subOrders.$.deliveredOn'] = arr[1].strip()elif "Exp. Delivery by" in arr[0]:updateMap['subOrders.$.estimatedDeliveryDate'] = arr[1].strip()elif "Est. delivery between" in arr[0]:updateMap['subOrders.$.estimatedDeliveryDate'] = delString.split("between")[1].strip()elif "Est. shipping between" in arr[0]:updateMap['subOrders.$.estimatedShippingDate'] = delString.split("between")[1].strip()else:updateMap['subOrders.$.estimatedShippingDate'] = arr[1].strip()courierDet = subOrderElement.find('div', {'class':'courierDetail'})if courierDet is not None:updateMap['subOrders.$.courierName'] = courierDet.span.text.strip()trackingDet = subOrderElement.find('div', {'class':'trackingNo'})if trackingDet is not None:updateMap['subOrders.$.trackingUrl'] = trackingDet.span.a['href']updateMap['subOrders.$.trackingNumber'] = trackingDet.span.a.text.strip()bulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)def _saveToAffiliate(self, offers, status):collection = self.db.snapdealOrderAffiliateInfo1#mcollection = self.db.merchantOrderfor offer in offers:offer = self.covertToObj(offer)if offer.orderId:dict1 = todict(offer)dict1["_id"] = dict1["orderId"] + "-" + dict1["productCode"]dict1['status'] = statuscollection.save(dict1)# def _saveToAffiliate(self, offers):# collection = self.db.snapdealOrderAffiliateInfo# mcollection = self.db.merchantOrder# for offer in offers:# offer = self.covertToObj(offer)# collection.update({"adId":offer.adId, "saleAmount":offer.saleAmount, "payOut":offer.payOut},{"$set":todict(offer)}, upsert=True)# mcollection.update({"subTagId":offer.subTagId, "storeId":self.store_id, "subOrders.missingAff":True}, {"$set":{"subOrders.$.missingAff":False}})def _getAllOffers(self, br, token):allOffers = []nextPage = 1while True:data = getPostData(token, nextPage)response = br.open(POST_URL, data)rmap = json.loads(ungzipResponse(response))if rmap is not None:rmap = rmap['response']print rmapif rmap is not None and len(rmap['errors'])==0:allOffers += rmap['data']['data']nextPage += 1if rmap['data']['pageCount']<nextPage:breakreturn allOffers# def covertToObj(self,offer):# offerData = offer['Stat']# offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'],# offerData['datetime'], int(float(offerData['payout'])), offer['Offer']['name'], offerData['ip'], int(float(offerData['conversion_sale_amount'])))# offer1.saleTime = int(time.mktime(datetime.strptime(offer1.saleDate, "%Y-%m-%d %H:%M:%S").timetuple()))# return offer1def parseInfo(self,):from pyquery import PyQuery as pqorders = list(session.query(Orders).filter_by(store_id=self.store_id).filter_by(status='ORDER_CREATED').group_by(Orders.user_id).all())try:for order in orders:try:doc = pq(order.rawhtml)a1= " ".join(["" if not div.text else div.text.replace("\t","").replace("\n","").strip() for div in pq(doc('article')[-1])('div')]).strip()a2 = ",".join(["" if not div.text else div.text.replace("\t","").replace("\n","").replace(" ", "") for div in pq(doc('article')[-2])('div')]).strip()user_address = All_user_addresses()user_address.address = a1all = a2.split(",")user_address.source = 'order'user_address.user_id = order.user_id#user_address. = all[3].split(":")[1]#user_address. = all[2].split(":")[1]#orderInfo.mobile = all[-1].split(":")[1]adSplit = a1.split(",")user_address.city = adSplit[-2].strip()user_address.pincode = adSplit[-1].strip().split(" ")[0]user_address.state = adSplit[-1].strip().split(" ")[1]session.commit()except:session.rollback()continuefinally:session.close()def covertToObj(self,offer):offer1 = AffiliateInfo(offer["affiliateSubId1"], 3, None, None, utils.toTimeStamp(datetime.strptime(offer["dateTime"], "%m/%d/%Y %H:%M:%S")),offer["commissionEarned"], None, None, offer["sale"])offer1.orderId = offer.get('orderCode') if offer.get('orderCode') else Noneoffer1.productCode = offer["product"]offer1.unitPrice = offer["price"]offer1.quantity = offer["quantity"]offer1.saleTime = offer["dateTime"]return offer1def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):endDate=date.today() + timedelta(days=1)startDate=endDate - timedelta(days=31)parameters = (("page",str(page)),("limit",str(limit)),("fields[]","Stat.offer_id"),("fields[]","Stat.datetime"),("fields[]","Offer.name"),("fields[]","Stat.conversion_status"),("fields[]","Stat.conversion_sale_amount"),("fields[]","Stat.payout"),("fields[]","Stat.ip"),("fields[]","Stat.ad_id"),("fields[]","Stat.affiliate_info1"),("sort[Stat.datetime]","desc"),("filters[Stat.date][conditional]","BETWEEN"),("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),("data_start",startDate.strftime('%Y-%m-%d')),("data_end",endDate.strftime('%Y-%m-%d')),("Method","getConversions"),("NetworkId","jasper"),("SessionToken",token),)#Encode the parametersreturn urllib.urlencode(parameters)def main():#print todict([1,2,"3"])store = getStore(3)#store.scrapeStoreOrders()store.parseOrderRawHtml(332222, "3232311", 2, readSSh("/home/amit/sample.html"), "https://m.snapdeal.com/purchaseMobileComplete?code=1a011639e72588db39169df568654620&order=17772748329&sdInstant=false")#store.scrapeAffiliate()#https://m.snapdeal.com/purchaseMobileComplete?code=3fbc8a02a1c4d3c4e906f46886de0464&order=5808451506#https://m.snapdeal.com/purchaseMobileComplete?code=9f4dfa49ff08a16d04c5e4bf519506fc&order=9611672826# orders = list(session.query(OrdersRaw).filter_by(store_id=3).filter_by(status='ORDER_NOT_CREATED').all())# for o in orders:# result = store.parseOrderRawHtml(o.id, o.sub_tag, o.user_id, o.rawhtml, o.order_url)['result']# o.status = result# session.commit()# session.close()# store.scrapeStoreOrders()#store._isSubOrderActive(8, "5970688907")#store.scrapeAffiliate(datetime(2015,4,1))#store.scrapeStoreOrders()#store.parseInfo()class obj(object):def __init__(self, d):for a, b in d.items():if isinstance(b, (list, tuple)):setattr(self, a, [obj(x) if isinstance(x, dict) else x for x in b])else:setattr(self, a, obj(b) if isinstance(b, dict) else b)def todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return objif __name__ == '__main__':main()