Rev 13809 | Rev 14239 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: amit'''from BeautifulSoup import BeautifulSoupfrom bson.binary import Binaryfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.dao import AffiliateInfo, Order, SubOrderfrom dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, Store as MStore,\ungzipResponsefrom pprint import pprintfrom pymongo import MongoClientimport jsonimport pymongoimport reimport tracebackimport urllibUSERNAME='profittill2@gmail.com'PASSWORD='spice@2020'AFFILIATE_URL='http://affiliate.snapdeal.com'POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'class Store(MStore):'''This is to map order statuses of our system to order statuses of snapdeal.And our statuses will change accordingly.'''OrderStatusMap = {MStore.ORDER_PLACED : ['In Progress','N/A'],MStore.ORDER_DELIVERED : ['Delivered'],MStore.ORDER_SHIPPED : ['In Transit'],MStore.ORDER_CANCELLED : ['Closed For Vendor Reallocation', 'Cancelled', 'Product returned by courier', 'Returned']}CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICEdef __init__(self,store_id):super(Store, self).__init__(store_id)def getName(self):return "snapdeal"def scrapeAffiliate(self, startDate=None, endDate=None):br = getBrowserObject()br.open(AFFILIATE_URL)br.select_form(nr=0)br.form['data[User][password]'] = PASSWORDbr.form['data[User][email]'] = USERNAMEbr.submit()response = br.open(CONFIG_URL)token = re.findall('"session_token":"(.*?)"', ungzipResponse(response), re.IGNORECASE)[0]allOffers = self._getAllOffers(br, token)allPyOffers = []maxSaleDate = self._getLastSaleDate()newMaxSaleDate = maxSaleDatefor offer in allOffers:pyOffer = self.covertToObj(offer).__dict__allPyOffers.append(pyOffer)saleDate = datetime.strptime(pyOffer['saleDate'],"%Y-%m-%d %H:%M:%S")if maxSaleDate < saleDate:self._updateOrdersPayBackStatus({'subTagId':pyOffer['subTagId'], 'saleDate':pyOffer['saleDate']}, {})if newMaxSaleDate < saleDate:newMaxSaleDate = saleDateself._setLastSaleDate(newMaxSaleDate)self._saveToAffiliate(allPyOffers)def _setLastSaleDate(self, saleDate):self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})def _getLastSaleDate(self,):lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})if lastDaySaleObj is None:return datetime.mindef _parse(self, orderId, subTagId, userId, page, orderSuccessUrl):#page=page.decode("utf-8")soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)#orderHead = soup.find(name, attrs, recursive, text)sections = soup.findAll("section")#print sectionsorder = sections[1]orderTrs = order.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)#Pop two section elementssections.pop(0)sections.pop(0)subOrders = sectionsmerchantSubOrders = []merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.merchantOrderId = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1]for orderTr in orderTrs:orderTrString = str(orderTr)if "Total Amount" in orderTrString:merchantOrder.totalAmount = re.findall(r'\d+', orderTrString)[0]elif "Delivery Charges" in orderTrString:merchantOrder.deliveryCharges = re.findall(r'\d+', orderTrString)[0]elif "Discount Applied" in orderTrString:merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]elif "Paid Amount" in orderTrString:merchantOrder.paidAmount = re.findall(r'\d+', orderTrString)[0]for subOrderElement in subOrders:subOrders = self.parseSubOrder(subOrderElement, placedOn)merchantSubOrders.extend(subOrders)merchantOrder.subOrders = merchantSubOrdersreturn merchantOrderdef parseSubOrder(self, subOrderElement, placedOn):subOrders = []productUrl = str(subOrderElement.find("a")['href'])subTable = subOrderElement.find("table", {"class":"lrPad"})subTrs = subTable.findAll("tr")unitPrice=NoneofferDiscount = NonedeliveryCharges = NoneamountPaid = Nonefor subTr in subTrs:subTrString = str(subTr)if "Unit Price" in subTrString:unitPrice = re.findall(r'\d+', subTrString)[0]if "Quantity" in subTrString:qty = re.findall(r'\d+', subTrString)[0]elif "Offer Discount" in subTrString:offerDiscount = re.findall(r'\d+', subTrString)[0]elif "Delivery Charges" in subTrString:deliveryCharges = re.findall(r'\d+', subTrString)[0]elif "Subtotal" in subTrString:if int(qty) > 0:amountPaid = str(int(re.findall(r'\d+', subTrString)[0])/int(qty))else:amountPaid = "0"if self.CONF_CB_AMOUNT == MStore.CONF_CB_SELLING_PRICE or offerDiscount is None:amount = int(unitPrice)else:amount = int(unitPrice) - int(offerDiscount)divs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})if len(divs)<=0:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")for div in divs:productTitle = str(subOrderElement.find("a").text)productUrl = "http://m.snapdeal.com/" + productUrlsubOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)subOrder.amountPaid = amountPaidsubOrder.deliveryCharges = deliveryChargessubOrder.offerDiscount = offerDiscountsubOrder.unitPrice = int(unitPrice)subOrder.productCode = re.findall(r'\d+$', productUrl)[0](cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountif percentage > 0:subOrder.cashBackPercentage = percentagetrackAnchor = div.find("a")if trackAnchor is not None:subOrder.tracingkUrl = str(trackAnchor['href'])divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")for line in divStr.split("<br />"):if "Suborder ID" in line:subOrder.merchantSubOrderId = re.findall(r'\d+', line)[0]elif "Status" in line:subOrder.detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]elif "Est. Shipping Date" in line:subOrder.estimatedShippingDate = line.split(":")[1].strip()elif "Est. Delivery Date" in line:subOrder.estimatedDeliveryDate = line.split(":")[1].strip()elif "Courier Name" in line:subOrder.courierName = line.split(":")[1].strip()elif "Tracking No" in line:subOrder.trackingNumber = line.split(":")[1].strip()subOrders.append(subOrder)return subOrdersdef parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):#print merchantOrderresp = {}try:br = getBrowserObject()url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]page = br.open(url)page = ungzipResponse(page)merchantOrder = self._parse(orderId, subTagId, userId, page, orderSuccessUrl)self._saveToOrder(todict(merchantOrder))resp['result'] = 'ORDER_CREATED'return respexcept:print "Error occurred"traceback.print_exc()resp['result'] = 'PARSE_ERROR'return resp#soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)#soup.find(name, attrs, recursive, text)def _getStatusFromDetailedStatus(self, detailedStatus):for key, value in Store.OrderStatusMap.iteritems():if detailedStatus in value:return keyprint "Detailed Status need to be mapped"raise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)def scrapeStoreOrders(self,):#collectionMap = {'palcedOn':1}orders = self._getActiveOrders()print "Found orders", ordersbr = getBrowserObject()for order in orders:url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', order['orderSuccessUrl'],re.IGNORECASE)[0]response = br.open(url)page = ungzipResponse(response)#page=page.decode("utf-8")soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)sections = soup.findAll("section")orderEl = sections[1]orderTrs = orderEl.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)sections.pop(0)sections.pop(0)subOrders = sectionsbulk = self.db.merchantOrder.initialize_ordered_bulk_op()for subOrderElement in subOrders:closed = Truedivs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})if len(divs)<=0:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")subOrder = NonebreakFlag = Falsefor div in divs:divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")updateMap = {}for line in divStr.split("<br />"):if "Suborder ID" in line:merchantSubOrderId = re.findall(r'\d+', line)[0]#break if suborder is inactivesubOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:subOrders = self.parseSubOrder(subOrderElement, placedOn)self.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":todict(subOrders)}})print "Added new suborder with subOrder Id:", subOrder.merchantSubOrderIdclosed = Falsereturnelif subOrder['closed']:breakFlag = Truebreakelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}elif "Status" in line:detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]updateMap["subOrders.$.detailedStatus"] = detailedStatusstatus = self._getStatusFromDetailedStatus(detailedStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]updateMap["subOrders.$.status"] = statusif detailedStatus == 'Closed For Vendor Reallocation':#if it is more than 6hours mark closed.closeAt = subOrder.get("closeAt")if closeAt is None:closeAt = datetime.now() + timedelta(hours=6)updateMap["subOrders.$.closeAt"] = datetime.strftime(closeAt,"%Y-%m-%d %H:%M:%S")else:closeAt = datetime.strptime(closeAt,"%Y-%m-%d %H:%M:%S")if datetime.now() > closeAt:closedStatus = Trueif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falseelif "Est. Shipping Date" in line:estimatedShippingDate = line.split(":")[1].strip()updateMap["subOrders.$.estimatedShippingDate"] = estimatedShippingDateelif "Est. Delivery Date" in line:estimatedDeliveryDate = line.split(":")[1].strip()updateMap["subOrders.$.estimatedDeliveryDate"] = estimatedDeliveryDateelif "Courier Name" in line:courierName = line.split(":")[1].strip()updateMap["subOrders.$.courierName"] = courierNameelif "Tracking No" in line:trackingNumber = line.split(":")[1].strip()updateMap["subOrders.$.trackingNumber"] = trackingNumberif breakFlag:breakbulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed}})result = bulk.execute()pprint(result)def _saveToAffiliate(self, offers):if offers is None or len(offers)==0:print "no affiliate have been pushed"returncollection = self.db.snapdealOrderAffiliateInfotry:collection.insert(offers,continue_on_error=True)except pymongo.errors.DuplicateKeyError as e:print e.detailsdef _getAllOffers(self, br, token):allOffers = []nextPage = 1while True:data = getPostData(token, nextPage)response = br.open(POST_URL, data)rmap = json.loads(ungzipResponse(response))if rmap is not None:rmap = rmap['response']if rmap is not None and len(rmap['errors'])==0:allOffers += rmap['data']['data']nextPage += 1if rmap['data']['pageCount']<nextPage:breakreturn allOffersdef covertToObj(self,offer):offerData = offer['Stat']offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'],offerData['datetime'], offerData['payout'], offer['Offer']['name'], offerData['ip'], offerData['conversion_sale_amount'])return offer1def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):endDate=date.today() + timedelta(days=1)startDate=endDate - timedelta(days=31)parameters = (("page",str(page)),("limit",str(limit)),("fields[]","Stat.offer_id"),("fields[]","Stat.datetime"),("fields[]","Offer.name"),("fields[]","Stat.conversion_status"),("fields[]","Stat.conversion_sale_amount"),("fields[]","Stat.payout"),("fields[]","Stat.ip"),("fields[]","Stat.ad_id"),("fields[]","Stat.affiliate_info1"),("sort[Stat.datetime]","desc"),("filters[Stat.date][conditional]","BETWEEN"),("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),("data_start",startDate.strftime('%Y-%m-%d')),("data_end",endDate.strftime('%Y-%m-%d')),("Method","getConversions"),("NetworkId","jasper"),("SessionToken",token),)#Encode the parametersreturn urllib.urlencode(parameters)def main():store = getStore(3)store.scrapeStoreOrders()#store._isSubOrderActive(8, "5970688907")#store.scrapeAffiliate()#store.parseOrderRawHtml(12345, "subtagId", 122323, "html", 'https://m.snapdeal.com/purchaseMobileComplete?code=1f4166d13ea799b65aa9dea68b3e9e70&order=4509499363')if __name__ == '__main__':main()def todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return obj