Rev 17462 | Rev 18037 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: amit'''from bs4 import BeautifulSoupfrom bson.binary import Binaryfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.api.Service import Ordersfrom dtr.dao import AffiliateInfo, Order, SubOrderfrom dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, \Store as MStore, ungzipResponse, tprintfrom dtr.storage import Mongofrom dtr.storage.DataService import Order_Parse_Info, All_user_addresses, \OrdersRawfrom dtr.storage.Mongo import getImgSrc, getDealRankfrom dtr.utils import utilsfrom dtr.utils.utils import fetchResponseUsingProxy, readSShfrom elixir import *from pprint import pprintfrom pymongo import MongoClientfrom pyquery import PyQueryfrom urlparse import urlparse, parse_qsfrom xlrd import open_workbookimport csvimport jsonimport os.pathimport pymongoimport reimport timeimport tracebackimport urllibimport urllib2USERNAME='profittill2@gmail.com'PASSWORD='spice@2020'AFFILIATE_URL='http://affiliate.snapdeal.com/login/'POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'#NEW_REPORT_URI_TEMPLATE = "http://affiliate.snapdeal.com/affiliate/reports/orders/report/?fromDate=%s&toDate=%s&dump_report=True&request_type=product&status=%s"AFF_URL_TEMPLATE = "http://affiliate-feeds.snapdeal.com/feed/api/order?startDate=%s&endDate=%s&status=%s"#"http://affiliate.snapdeal.com/affiliate/reports/orders/report/?fromDate=2015-04-01&toDate=2015-09-15&dump_report=True&request_type=product&status=cancelled#"http://affiliate.snapdeal.com/affiliate/reports/orders/report/?fromDate=07-09-2015&toDate=13-09-2015&dump_report=True&request_type=product&status=cancelledAFF_ID = "33550"AFF_TOKEN="66d526141b9d39c4b2b4ff76eadc34"class Store(MStore):'''This is to map order statuses of our system to order statuses of snapdeal.And our statuses will change accordingly.'''OrderStatusMap = {MStore.ORDER_PLACED : ['in progress', 'pending for verification', 'not available', 'in process','processing', 'processed', 'under verification', 'readying for dispatch','waiting for courier to pick up', ''],MStore.ORDER_DELIVERED : ['delivered','delivered successfully!'],MStore.ORDER_SHIPPED : ['in transit', 'dispatched', 'handed over to courier', 'undelivered. update delivery details!'],MStore.ORDER_CANCELLED : ['closed for vendor reallocation', 'cancelled', 'product returned by courier', 'returned', 'n/a', 'courier returned','a new order placed with a different seller', 'closed', 'cancellation in progress', 'verification failed. order cancelled','cancelled. payment refunded','closed. new order placed']}CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICEdef __init__(self,store_id):super(Store, self).__init__(store_id)def getName(self):return "snapdeal"def scrapeAffiliate(self, startDate=None, endDate=None):endDate=date.today() - timedelta(days=1)if startDate is None:startDate = endDate - timedelta(days=45)endDate = endDate.strftime('%Y-%m-%d')startDate = startDate.strftime('%Y-%m-%d')statuses=['cancelled', 'approved']for status in statuses:nextUrl = AFF_URL_TEMPLATE%(startDate, endDate, status)while nextUrl:req = urllib2.Request(nextUrl)nextUrl=''req.add_header('Snapdeal-Affiliate-Id', AFF_ID)req.add_header('Snapdeal-Token-Id', AFF_TOKEN)req.add_header('Accept', 'application/json')resp = urllib2.urlopen(req)ordersDict = json.loads(resp.read())self._saveToAffiliate(ordersDict['productDetails'], status)def _setLastSaleDate(self, saleDate):self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})def _getLastSaleDate(self,):lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})if lastDaySaleObj is None:return datetime.mindef _parseB(self, orderId, subTagId, userId, page, orderSuccessUrl):soup = BeautifulSoup(page)orderDetailContainerDivs = soup.body.find("div", {'class':'cardLayoutWrap'}).findAll('div', recursive=False)orderDetailDiv = orderDetailContainerDivs.pop(0)paymentDetailDiv = orderDetailContainerDivs.pop(0)subOrders = orderDetailContainerDivsplacedOn = orderDetailDiv.span.text.split(':')[1].strip()merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = parse_qs(urlparse(orderSuccessUrl).query)['order'][0]paymentDivs = paymentDetailDiv.findAll('div', recursive=False)paymentDivs.pop(0)for orderTr in paymentDivs:orderTrString = str(orderTr)if "Total Amount Paid" in orderTrString:amountPaid = orderTr.div.find('div', {'class':'detailBlock'}).text.strip()merchantOrder.paidAmount = int(re.findall(r'\d+', amountPaid)[0])elif "Total Amount" in orderTrString:merchantOrder.totalAmount = re.findall(r'\d+', orderTrString)[0]elif "Delivery Charges" in orderTrString:merchantOrder.deliveryCharges = re.findall(r'\d+', orderTrString)[0]elif "Discount Applied" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]elif "Offer Discount" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]merchantSubOrders = []for subOrderElement in subOrders:subOrder = self.parseSubOrderB(subOrderElement, placedOn)if subOrder is not None:dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrder.maxNlc = dealRank.get('maxNlc')subOrder.minNlc = dealRank.get('minNlc')subOrder.db = dealRank.get('dp')subOrder.itemStatus = dealRank.get('status')merchantSubOrders.append(subOrder)merchantOrder.subOrders = merchantSubOrdersreturn merchantOrderdef _parse(self, orderId, subTagId, userId, page, orderSuccessUrl):#page=page.decode("utf-8")soup = BeautifulSoup(page)#orderHead = soup.find(name, attrs, recursive, text)sections = soup.findAll("section")#print sectionsorder = sections[1]orderTrs = order.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)#Pop two section elementssections.pop(0)sections.pop(0)subOrders = sectionsmerchantSubOrders = []merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1]for orderTr in orderTrs:orderTrString = str(orderTr)if "Total Amount" in orderTrString:merchantOrder.totalAmount = re.findall(r'\d+', orderTrString)[0]elif "Delivery Charges" in orderTrString:merchantOrder.deliveryCharges = re.findall(r'\d+', orderTrString)[0]elif "Discount Applied" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]elif "Paid Amount" in orderTrString:merchantOrder.paidAmount = re.findall(r'\d+', orderTrString)[0]for subOrderElement in subOrders:subOrders = self.parseSubOrder(subOrderElement, placedOn)merchantSubOrders.extend(subOrders)merchantOrder.subOrders = merchantSubOrdersreturn merchantOrderdef parseSubOrder(self, subOrderElement, placedOn):subOrders = []productUrl = str(subOrderElement.find("a")['href'])subTable = subOrderElement.find("table", {"class":"lrPad"})subTrs = subTable.findAll("tr")unitPrice=NoneofferDiscount = 0deliveryCharges = NoneamountPaid = Noneamount = 0sdCash = 0unitPrice = 0for subTr in subTrs:subTrString = str(subTr)if "Unit Price" in subTrString:unitPrice = int(re.findall(r'\d+', subTrString)[0])if "Quantity" in subTrString:qty = int(re.findall(r'\d+', subTrString)[0])elif "Offer Discount" in subTrString:offerDiscount += int(re.findall(r'\d+', subTrString)[0])elif "SD Cash" in subTrString:sdCash = int(re.findall(r'\d+', subTrString)[0])elif "Delivery Charges" in subTrString:deliveryCharges = int(re.findall(r'\d+', subTrString)[0])elif "Subtotal" in subTrString:if int(qty) > 0:amountPaid = int(re.findall(r'\d+', subTrString)[0])/qtyelse:amountPaid = 0if qty>0:amount = unitPrice - offerDiscount - sdCashamount = 0 if amount < 0 else amountdiv1 = subOrderElement.find("div", {"class": "blk lrPad subordrs"})if div1 is None:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")for strDiv in str(div1).split("<div class=\"seperator\"></div>"):div = BeautifulSoup(strDiv)productTitle = str(subOrderElement.find("a").text)productUrl = "http://m.snapdeal.com/" + productUrlsubOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)subOrder.amountPaid = amountPaidsubOrder.deliveryCharges = deliveryChargessubOrder.offerDiscount = offerDiscountsubOrder.unitPrice = int(unitPrice)subOrder.productCode = re.findall(r'\d+$', productUrl)[0]subOrder.imgUrl = Mongo.getImgSrc(subOrder.productCode, self.store_id).get('thumbnail')cashbackStatus = Store.CB_NAcashbackAmount = 0percentage = 0if amount > 0:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)if cashbackAmount > 0:cashbackStatus = Store.CB_PENDINGsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountsubOrder.cashBackPercentage = percentagetrackAnchor = div.find("a")if trackAnchor is not None:subOrder.tracingkUrl = str(trackAnchor['href'])divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")for line in divStr.split("<br />"):if "Suborder ID" in line:subOrder.merchantSubOrderId = re.findall(r'\d+', line)[0]elif "Status" in line:print linesubOrder.detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]elif "Est. Shipping Date" in line:subOrder.estimatedShippingDate = line.split(":")[1].strip()elif "Est. Delivery Date" in line:subOrder.estimatedDeliveryDate = line.split(":")[1].strip()elif "Courier Name" in line:subOrder.courierName = line.split(":")[1].strip()elif "Tracking No" in line:subOrder.trackingNumber = line.split(":")[1].strip()subOrders.append(subOrder)return subOrdersdef parseSubOrderB(self, subOrderElement, placedOn):subOrders = []prodDivs = subOrderElement.findAll('div', recursive=False)prodDetailDiv = prodDivs[1].findAll('div', recursive=False)offerDiscount = 0deliveryCharges = NoneamountPaid = 0sdCash = 0unitPrice = 0paymentDivs = prodDivs[2].findAll('div', recursive=False)for paymentDiv in paymentDivs:strPaymentDiv = str(paymentDiv)if "Unit Price" in strPaymentDiv:try:unitPrice = int(re.findall(r'\d+', strPaymentDiv)[0])except:return Noneelif "Offer Discount" in strPaymentDiv:offerDiscount += int(re.findall(r'\d+', strPaymentDiv)[0])elif "Discount" in strPaymentDiv:offerDiscount += int(re.findall(r'\d+', strPaymentDiv)[0])elif "SD Cash" in strPaymentDiv:sdCash = int(re.findall(r'\d+', strPaymentDiv)[0])elif "Delivery Charges" in strPaymentDiv:deliveryCharges = int(re.findall(r'\d+', strPaymentDiv)[0])elif "Subtotal" in strPaymentDiv:amountPaid = int(re.findall(r'\d+', paymentDiv.find('div', {'class':'itemPriceDetail'}).text)[0])amount = unitPrice - offerDiscount - sdCashimgDiv = prodDetailDiv[0]otherDiv = prodDetailDiv[1]productTitle = otherDiv.find('div',{'class':'orderName'}).text.strip()productUrl = imgDiv.a['href']subOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)subOrder.merchantSubOrderId = prodDivs[0].text.split(':')[1].strip()subOrder.detailedStatus = otherDiv.find('div',{'class':'orderStatus'}).span.text.strip()if subOrder.detailedStatus.lower() == "processing":processingDetailedStatus = subOrderElement.find('div', {'class':'trackingMessage'}).text.strip()if processingDetailedStatus.lower() == 'a new order placed with a different seller':subOrder.detailedStatus = processingDetailedStatusdeliveryStatus = otherDiv.find('div',{'class':'orderDelivery'})if deliveryStatus is not None:delString = deliveryStatus.text.strip()arr = delString.split(':')if "On" in arr[0]:subOrder.deliveredOn = arr[1].strip()elif "Exp. Delivery by" in arr[0]:subOrder.estimatedDeliveryDate = arr[1].strip()elif "Est. delivery between" in arr[0]:subOrder.estimatedDeliveryDate = arr[0].split("between")[1].strip()elif "Est. shipping between" in arr[0]:subOrder.estimatedShippingDate = arr[0].split("between")[1].strip()else:subOrder.estimatedShippingDate = arr[1].strip()subOrder.imgUrl = imgDiv.a.img['src']subOrder.productCode = re.findall(r'\d+$', productUrl)[0]subOrder.deliveryCharges = deliveryChargessubOrder.offerDiscount = offerDiscountsubOrder.unitPrice = int(unitPrice)cashbackStatus = Store.CB_NAcashbackAmount = 0percentage = 0if amountPaid > 0:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)if cashbackAmount > 0:cashbackStatus = Store.CB_PENDINGsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountsubOrder.cashBackPercentage = percentagecourierDet = subOrderElement.find('div', {'class':'courierDetail'})if courierDet is not None:subOrder.courierName = courierDet.span.text.strip()trackingDet = subOrderElement.find('div', {'class':'trackingNo'})if trackingDet is not None:subOrder.trackingUrl = trackingDet.span.a['href']subOrder.trackingNumber = trackingDet.span.a.text.strip()subOrders.append(subOrder)return subOrderdef getOrderJSON(self, rawHtml, supcMap):#print rawHtml# replace_with = {# '<': '>',# '>': '<',# '&': '&',# '"': '"', # should be escaped in attributes# "'": ''' # should be escaped in attributes# }pq = PyQuery(rawHtml)jsonValue = pq("#orderJSON").attr("value")jsonValue.replace(""", '"')jsonValue.replace("&", '&')jsonValue.replace(">", '>')jsonValue.replace("<", '<')jsonValue.replace("'", "'")allSupcElements = pq('div.mdt-layout')('div.mdt-card')('div.order-item')for supcElement in allSupcElements:try:supcElement = pq(supcElement)title = supcElement('div.order-heading').text().strip()productUrl = supcElement.attr("data-href")imgUrl = supcElement.find('img').attr('src')supc = imgUrl.split('-')[-3]supcMap[supc] = {'title':title, 'imgUrl':imgUrl, 'productUrl':productUrl}except:passreturn json.loads(jsonValue)def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):#print merchantOrderresp = {}url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]supcMap = {}try:try:orderJSON = self.getOrderJSON(rawHtml, supcMap)except:traceback.print_exc()try:page = fetchResponseUsingProxy(orderSuccessUrl)orderJSON = self.getOrderJSON(page,supcMap)except:traceback.print_exc()orderJSON = Noneif orderJSON is None:page =fetchResponseUsingProxy(url)page = ungzipResponse(page)try:merchantOrder = self._parseB(orderId, subTagId, userId, page, orderSuccessUrl)except:traceback.print_exc()merchantOrder = self._parse(orderId, subTagId, userId, page, orderSuccessUrl)else:merchantOrder = self._parseC(orderId, subTagId, userId, supcMap, orderJSON, orderSuccessUrl)merchantOrder.orderTrackingUrl = urlif self._saveToOrder(todict(merchantOrder)):resp['result'] = 'ORDER_CREATED'else:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'print "=================", resp, orderId, "=============="return respexcept:print "Error occurred"traceback.print_exc()resp['result'] = 'ORDER_NOT_CREATED'print "=================", resp, orderId, "=============="return resp#soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)#soup.find(name, attrs, recursive, text)def _parseC(self, orderId, subTagId, userId, supcMap, orderJSON, orderSuccessUrl):print orderJSONmerchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)placedOn = datetime.strftime(utils.fromTimeStamp(orderJSON['created']/1000), "%a, %d %b, %Y")merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = orderJSON['code']merchantOrder.paidAmount = orderJSON['paidAmount']merchantOrder.deliveryCharges = orderJSON['shippingCharges']merchantOrder.closed= FalsemerchantSubOrders = []for s in orderJSON['suborders']:print sif not supcMap.has_key(s['supcCode']):skuData = Mongo.get_mongo_connection().Catalog.MasterData.find_one({'identifier':s['supcCode'], 'source_id':self.store_id})if skuData is None:url = "http://www.snapdeal.com/search/autoSuggestion?q=%s&catId=0&ver=3"%s['supcCode']html = utils.fetchResponseUsingProxy(url)html = html.replace(" ", "")ul = PyQuery(html)('ul.top-products')title = PyQuery(ul('div.product-text')[0]).text().strip()productUrl = ul('a').attr("href").split("?")[0]imgUrl = ul('img').attr('src')else:title = skuData['product_name']productUrl = skuData['marketPlaceUrl']imgUrl = skuData['thumbnail']supcMap[s['supcCode']] = {'title':title, 'imgUrl':imgUrl, 'productUrl':productUrl}map1 = supcMap[s['supcCode']]amountPaid = s['paidAmount']productTitle = map1['title']productUrl = map1['productUrl']subOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)if(s.get('deliveryDate') is not None):print "Delivered On",subOrder.deliveredOn = datetime.strftime(utils.fromTimeStamp(s.get('deliveryDate')/1000),'%d %b, %Y')subOrder.status = MStore.ORDER_DELIVEREDsubOrder.detailedStatus = MStore.ORDER_DELIVEREDelif s['suborderStatus'].get('macroDescription')== 'Closed':if s['suborderStatus'].get('value')== 'Close for vendor reallocation':subOrder.detailedStatus = 'Close for vendor reallocation'subOrder.status = MStore.ORDER_CANCELLEDtry:subOrder.detailedStatus = s['suborderStatus']['macroDescription']subOrder.status = self._getStatusFromDetailedStatus(subOrder.detailedStatus)except:print "----------------", s['suborderStatus']subOrder.merchantSubOrderId = s['code']subOrder.deliveryCharges = s['shippingCharges']subOrder.productCode = re.findall(r'\d+$', productUrl)[0]subOrder.imgUrl = map1['imgUrl']subOrder.unitPrice = s['offerPrice'] -s['internalCashbackValue'] - s['externalCashbackValue']subOrder.amount = subOrder.unitPrice - s['offerDiscount'] - s['sdCash']try:try:if s['shipDateRange']['start']==s['shipDateRange']['end']:subOrder.estimatedShippingDate = datetime.strftime(utils.fromTimeStamp(s['shipDateRange']['start']/1000),'%d %b, %Y')else:subOrder.estimatedShippingDate = datetime.strftime(utils.fromTimeStamp(s['shipDateRange']['start']/1000),'%d %b, %Y') + " - " + datetime.strftime(utils.fromTimeStamp(s['shipDateRange']['end']/1000),'%d %b, %Y')except:if s['deliveryDateRange']['start']==s['deliveryDateRange']['end']:subOrder.estimatedDeliveryDate = datetime.strftime(utils.fromTimeStamp(s['deliveryDateRange']['start']/1000),'%d %b, %Y')else:subOrder.estimatedDeliveryDate = datetime.strftime(utils.fromTimeStamp(s['deliveryDateRange']['start']/1000),'%d %b, %Y') + " - " + datetime.strftime(utils.fromTimeStamp(s['deliveryDateRange']['end']/1000),'%d %b, %Y')except:passsubOrder.offerDiscount = s['offerDiscount']subOrder.unitPrice = s['offerPrice']merchantSubOrders.append(subOrder)merchantOrder.subOrders = merchantSubOrdersself.populateDerivedFields(merchantOrder)return merchantOrderdef _getStatusFromDetailedStatus(self, detailedStatus):for key, value in Store.OrderStatusMap.iteritems():if detailedStatus.lower() in value:return keyprint "Detailed Status need to be mapped", detailedStatus, self.store_idreturn Nonedef scrapeStoreOrders(self,):#collectionMap = {'palcedOn':1}orders = self._getActiveOrders()for order in orders:order = obj(order)print "Order", self.store_name, order.orderIdtry:url = order.orderTrackingUrlpage = fetchResponseUsingProxy(url)#page=page.decode("utf-8")try:pq = PyQuery(page)subOrderStatusMap={}for el in pq('div.cardLayout.pad-10.mb-10'):elpq = PyQuery(el)try:subOrderId = elpq("div.subOrderId").text().split(":")[1].strip()subOrderStatusMap[subOrderId] = elpqexcept:passclosedForReco = {}for suborder in order.subOrders:if suborder.closed:if suborder.merchantSubOrderId in subOrderStatusMap:del subOrderStatusMap[suborder.merchantSubOrderId]continueif subOrderStatusMap.has_key(suborder.merchantSubOrderId):elpq = subOrderStatusMap.get(suborder.merchantSubOrderId)del subOrderStatusMap[suborder.merchantSubOrderId]if elpq("#trackLink").attr("href"):suborder.trackingUrl = elpq("#trackLink").attr("href")if elpq('span.subOrdStatusText').text():suborder.estimatedDeliveryDate=elpq('span.subOrdStatusText').text().strip()suborder.detailedStatus = elpq("div.orderStatus span").text().strip()if suborder.detailedStatus == 'Closed. New Order Placed':closedForReco[suborder.merchantSubOrderId] = subordersuborder.status = self._getStatusFromDetailedStatus(suborder.detailedStatus)if len(closedForReco) == len(subOrderStatusMap) and len(closedForReco)>0:productCode = ''allProductsSame = Truefor subOrderId, subo in closedForReco.iteritems():if productCode == '':productCode = subo.productCodecontinueif subo.productCode != productCode:allProductsSame = FalsebreaksubOrderStatusMapif allProductsSame:print "singlereco", order.orderIdsuborderNew = obj(todict(suborder))for key, elpq in subOrderStatusMap.iteritems():suborderNew.merchantSubOrderId = keyif elpq("#trackLink").attr("href"):suborderNew.trackingUrl = elpq("#trackLink").attr("href")if elpq('span.subOrdStatusText').text():suborderNew.estimatedDeliveryDate=elpq('span.subOrdStatusText').text().strip()suborderNew.detailedStatus = elpq("div.orderStatus span").text().strip()suborderNew.status = self._getStatusFromDetailedStatus(suborderNew.detailedStatus)order.subOrders.append(suborderNew)else:print "All products not same referring transaction url", order.orderIdelse:pass#Lookout for ordersummary page for exact mappingself.populateDerivedFields(order, update=True)self._updateToOrder(todict(order))except:traceback.print_exc()soup = BeautifulSoup(page)try:self.tryBParsing(order, soup)except:traceback.print_exc()sections = soup.findAll("section")orderEl = sections[1]orderTrs = orderEl.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)sections.pop(0)sections.pop(0)subOrders = sectionsbulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = Truefor subOrderElement in subOrders:div1 = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})if len(div1)<=0:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")subOrder = NonebreakFlag = Falsefor strDiv in str(div1).split("<div class=\"seperator\"></div>"):div = BeautifulSoup(strDiv)divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")updateMap = {}for line in divStr.split("<br />"):if "Suborder ID" in line:merchantSubOrderId = re.findall(r'\d+', line)[0]#break if suborder is inactivesubOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:subOrders = self.parseSubOrder(subOrderElement, placedOn)self.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict(subOrders)}}})print "Added new suborders to Order id - ", order['orderId']closed = FalsebreakFlag = Truebreakelif subOrder['closed']:breakFlag = Truebreakelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}elif "Status :" in line:detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]updateMap["subOrders.$.detailedStatus"] = detailedStatusstatus = self._getStatusFromDetailedStatus(detailedStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]if status is not None:updateMap["subOrders.$.status"] = statusif detailedStatus == 'Closed For Vendor Reallocation':#if it is more than 6hours mark closed.closeAt = subOrder.get("closeAt")if closeAt is None:closeAt = datetime.now() + timedelta(hours=6)updateMap["subOrders.$.closeAt"] = datetime.strftime(closeAt,"%Y-%m-%d %H:%M:%S")else:closeAt = datetime.strptime(closeAt,"%Y-%m-%d %H:%M:%S")if datetime.now() > closeAt:closedStatus = True#Close if not applicable suborders are not closedif utils.fromTimeStamp(order['createdOnInt'] + 35*86400*1000) < datetime.now() and subOrder['cashBackStatus']==utils.CB_NA:closedStatus=Trueif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falseelif "Est. Shipping Date" in line:estimatedShippingDate = line.split(":")[1].strip()updateMap["subOrders.$.estimatedShippingDate"] = estimatedShippingDateelif "Est. Delivery Date" in line:estimatedDeliveryDate = line.split(":")[1].strip()updateMap["subOrders.$.estimatedDeliveryDate"] = estimatedDeliveryDateelif "Courier Name" in line:courierName = line.split(":")[1].strip()updateMap["subOrders.$.courierName"] = courierNameelif "Tracking No" in line:trackingNumber = line.split(":")[1].strip()updateMap["subOrders.$.trackingNumber"] = trackingNumberif breakFlag:continuebulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)except:traceback.print_exc()tprint("Could not update " + str(order.orderId) + "For store " + self.getName())self.db.merchantOrder.update({"orderId":order.orderId}, {"$set":{"parseError":True}})def tryBParsing(self, order, soup):orderDetailContainerDivs = soup.body.find("div", {'class':'cardLayoutWrap'}).findAll('div', recursive=False)orderDetailDiv = orderDetailContainerDivs.pop(0)placedOn = orderDetailDiv.span.text.split(':')[1].strip()orderDetailContainerDivs.pop(0)subOrders = orderDetailContainerDivsbulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = Truefor subOrderElement in subOrders:prodDivs = subOrderElement.findAll('div', recursive=False)merchantSubOrderId = prodDivs[0].text.split(':')[1].strip()subOrder = NonesubOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:try:subOrder = self.parseSubOrderB(subOrderElement, placedOn)if subOrder is None:continueself.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict([subOrder])}}})print "Added new suborders to Order id - ", order['orderId']closed = Falseexcept:passcontinueelif subOrder['closed']:continueelse:prodDetailDiv = prodDivs[1].findAll('div', recursive=False)otherDiv = prodDetailDiv[1]trackBlock = subOrderElement.find('div',{'class':'trackingDetailsBlock'})findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}updateMap = {}detailedStatus = otherDiv.find('div',{'class':'orderStatus'}).span.text.strip()if 'A new order placed with a different seller' in str(trackBlock):#if it is more than 6hours mark closed.closeAt = subOrder.get("closeAt")if closeAt is None:closeAt = datetime.now() + timedelta(hours=6)updateMap["subOrders.$.closeAt"] = datetime.strftime(closeAt,"%Y-%m-%d %H:%M:%S")bulk.find(findMap).update({'$set' : updateMap})closed=Falsecontinueelse:closeAt = datetime.strptime(closeAt,"%Y-%m-%d %H:%M:%S")if datetime.now() > closeAt:detailedStatus = 'A new order placed with a different seller'status = self._getStatusFromDetailedStatus(detailedStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]updateMap["subOrders.$.detailedStatus"] = detailedStatusif status is not None:updateMap["subOrders.$.status"] = statusif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = FalsedeliveryStatus = otherDiv.find('div',{'class':'orderDelivery'})if deliveryStatus is not None:delString = deliveryStatus.text.strip()arr = delString.split(':')if "On" in arr[0]:updateMap['subOrders.$.deliveredOn'] = arr[1].strip()elif "Exp. Delivery by" in arr[0]:updateMap['subOrders.$.estimatedDeliveryDate'] = arr[1].strip()elif "Est. delivery between" in arr[0]:updateMap['subOrders.$.estimatedDeliveryDate'] = delString.split("between")[1].strip()elif "Est. shipping between" in arr[0]:updateMap['subOrders.$.estimatedShippingDate'] = delString.split("between")[1].strip()else:updateMap['subOrders.$.estimatedShippingDate'] = arr[1].strip()courierDet = subOrderElement.find('div', {'class':'courierDetail'})if courierDet is not None:updateMap['subOrders.$.courierName'] = courierDet.span.text.strip()trackingDet = subOrderElement.find('div', {'class':'trackingNo'})if trackingDet is not None:updateMap['subOrders.$.trackingUrl'] = trackingDet.span.a['href']updateMap['subOrders.$.trackingNumber'] = trackingDet.span.a.text.strip()bulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)def _saveToAffiliate(self, offers, status):collection = self.db.snapdealOrderAffiliateInfo1#mcollection = self.db.merchantOrderfor offer in offers:offer = self.covertToObj(offer)if offer.orderId:dict1 = todict(offer)dict1["_id"] = dict1["orderId"] + "-" + dict1["productCode"]dict1['status'] = statuscollection.save(dict1)# def _saveToAffiliate(self, offers):# collection = self.db.snapdealOrderAffiliateInfo# mcollection = self.db.merchantOrder# for offer in offers:# offer = self.covertToObj(offer)# collection.update({"adId":offer.adId, "saleAmount":offer.saleAmount, "payOut":offer.payOut},{"$set":todict(offer)}, upsert=True)# mcollection.update({"subTagId":offer.subTagId, "storeId":self.store_id, "subOrders.missingAff":True}, {"$set":{"subOrders.$.missingAff":False}})def _getAllOffers(self, br, token):allOffers = []nextPage = 1while True:data = getPostData(token, nextPage)response = br.open(POST_URL, data)rmap = json.loads(ungzipResponse(response))if rmap is not None:rmap = rmap['response']print rmapif rmap is not None and len(rmap['errors'])==0:allOffers += rmap['data']['data']nextPage += 1if rmap['data']['pageCount']<nextPage:breakreturn allOffers# def covertToObj(self,offer):# offerData = offer['Stat']# offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'],# offerData['datetime'], int(float(offerData['payout'])), offer['Offer']['name'], offerData['ip'], int(float(offerData['conversion_sale_amount'])))# offer1.saleTime = int(time.mktime(datetime.strptime(offer1.saleDate, "%Y-%m-%d %H:%M:%S").timetuple()))# return offer1def parseInfo(self,):from pyquery import PyQuery as pqorders = list(session.query(Orders).filter_by(store_id=self.store_id).filter_by(status='ORDER_CREATED').group_by(Orders.user_id).all())try:for order in orders:try:doc = pq(order.rawhtml)a1= " ".join(["" if not div.text else div.text.replace("\t","").replace("\n","").strip() for div in pq(doc('article')[-1])('div')]).strip()a2 = ",".join(["" if not div.text else div.text.replace("\t","").replace("\n","").replace(" ", "") for div in pq(doc('article')[-2])('div')]).strip()user_address = All_user_addresses()user_address.address = a1all = a2.split(",")user_address.source = 'order'user_address.user_id = order.user_id#user_address. = all[3].split(":")[1]#user_address. = all[2].split(":")[1]#orderInfo.mobile = all[-1].split(":")[1]adSplit = a1.split(",")user_address.city = adSplit[-2].strip()user_address.pincode = adSplit[-1].strip().split(" ")[0]user_address.state = adSplit[-1].strip().split(" ")[1]session.commit()except:session.rollback()continuefinally:session.close()def covertToObj(self,offer):offer1 = AffiliateInfo(offer["affiliateSubId1"], 3, None, None, utils.toTimeStamp(datetime.strptime(offer["dateTime"], "%m/%d/%Y %H:%M:%S")),offer["commissionEarned"], None, None, offer["sale"])offer1.orderId = offer.get('orderCode') if offer.get('orderCode') else Noneoffer1.productCode = offer["product"]offer1.unitPrice = offer["price"]offer1.quantity = offer["quantity"]offer1.saleTime = offer["dateTime"]return offer1def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):endDate=date.today() + timedelta(days=1)startDate=endDate - timedelta(days=31)parameters = (("page",str(page)),("limit",str(limit)),("fields[]","Stat.offer_id"),("fields[]","Stat.datetime"),("fields[]","Offer.name"),("fields[]","Stat.conversion_status"),("fields[]","Stat.conversion_sale_amount"),("fields[]","Stat.payout"),("fields[]","Stat.ip"),("fields[]","Stat.ad_id"),("fields[]","Stat.affiliate_info1"),("sort[Stat.datetime]","desc"),("filters[Stat.date][conditional]","BETWEEN"),("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),("data_start",startDate.strftime('%Y-%m-%d')),("data_end",endDate.strftime('%Y-%m-%d')),("Method","getConversions"),("NetworkId","jasper"),("SessionToken",token),)#Encode the parametersreturn urllib.urlencode(parameters)def main():#print todict([1,2,"3"])store = getStore(3)#store.parseOrderRawHtml(332221, "3232311", 2, readSSh("/home/amit/snapdeal.html"), "https://m.snapdeal.com/purchaseMobileComplete?code=b92753bd7236bb3efbd6e8a0df46b962&order=9627657388")store.scrapeAffiliate()#https://m.snapdeal.com/purchaseMobileComplete?code=3fbc8a02a1c4d3c4e906f46886de0464&order=5808451506#https://m.snapdeal.com/purchaseMobileComplete?code=9f4dfa49ff08a16d04c5e4bf519506fc&order=9611672826# orders = list(session.query(OrdersRaw).filter_by(store_id=3).filter_by(status='ORDER_NOT_CREATED').all())# for o in orders:# result = store.parseOrderRawHtml(o.id, o.sub_tag, o.user_id, o.rawhtml, o.order_url)['result']# o.status = result# session.commit()# session.close()#store.scrapeStoreOrders()#store._isSubOrderActive(8, "5970688907")#store.scrapeAffiliate(datetime(2015,4,1))#store.scrapeStoreOrders()#store.parseInfo()class obj(object):def __init__(self, d):for a, b in d.items():if isinstance(b, (list, tuple)):setattr(self, a, [obj(x) if isinstance(x, dict) else x for x in b])else:setattr(self, a, obj(b) if isinstance(b, dict) else b)def todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return objif __name__ == '__main__':main()