Rev 15632 | Rev 16210 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: amit'''from BeautifulSoup import BeautifulSoupfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.dao import AffiliateInfo, Order, SubOrder, FlipkartAffiliateInfofrom dtr.main import getBrowserObject, getStore, ParseException, ungzipResponse, \Store as MStore, sourceMap, tprintfrom dtr.reports.affiliatereco import getSkuDatafrom dtr.storage.DataService import Clicks, Users, FlipkartOrdersfrom dtr.utils.utils import fetchResponseUsingProxyfrom elixir import *from pprint import pprintfrom pymongo.mongo_client import MongoClientimport hashlibimport importlibimport jsonimport mechanizeimport pymongoimport reimport tracebackimport urllibUSERNAME='saholic1@gmail.com'PASSWORD='spice@2020'ORDER_TRACK_URL='https://m.flipkart.com/order_details'AFFILIATE_URL='https://www.flipkart.com/affiliate/login'AFFILIATE_LOGIN_URL='https://www.flipkart.com/affiliate/a_login'AFF_REPORT_URL='http://www.flipkart.com/affiliate/reports/ordersReport?order_status_filter=%s&startdate=%s&enddate=%s&page=%s'AFF_STATUS_CANCELLED='cancelled'AFF_STATUS_APPROVED='approved'AFF_STATUS_DISAPPROVED='disapproved'AFF_STATUS_PENDING='pending'categoryMap = {3:"Mobiles", 5:"Tablets"}class Store(MStore):OrderStatusMap = {main.Store.ORDER_PLACED : ['approval', 'processing', 'shipping'],main.Store.ORDER_DELIVERED : ['your item has been delivered'],main.Store.ORDER_SHIPPED : ['in transit', 'shipment yet to be delivered'],main.Store.ORDER_CANCELLED : ['shipment is returned', 'your item has been returned', 'your shipment has been cancelled', 'your shipment has been cancelled.']}def __init__(self,store_id):client = MongoClient('mongodb://localhost:27017/')self.db = client.dtrsuper(Store, self).__init__(store_id)def getName(self):return "flipkart"def scrapeStoreOrders(self,):orders = self._getActiveOrders()for order in orders:print "Order", self.store_name, order['orderId']try:closed = Trueurl = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', order['orderSuccessUrl'],re.IGNORECASE)[0]page = fetchResponseUsingProxy(url)soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)sections = soup.findAll("div", {"class":"ui-app-card-body"})sections.pop(1)mainOrder = soup.find("ul",{"class":"m-bottom p-cart"})fkSubOrders = mainOrder.findAll("li")#remove unwanted listfkSubOrders.pop(-1)bulk = self.db.merchantOrder.initialize_ordered_bulk_op()#fetching suborders detailsfor subOrder in fkSubOrders:updateMap = {}y = subOrder.find("header").findAll("span")for suborderId in y:if "value emp" in str(suborderId):merchantSubOrderId = suborderId.textbreakul = subOrder.find("ul")if ul is None:ul = subOrder.findAll("div", recursive=False)[0].div.divorderItems = ul.findAll("div", recursive=False)i=0for orderItem in orderItems:closedStatus = FalsemerchantSubOrderId = merchantSubOrderId + str(i)subOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:breakelif subOrder['closed']:breakfindMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}divs = orderItem.findAll('div', recursive=False)orderTracking = divs[2]orderTrackingDetDiv = divs[3].find('div',{'class':'c-tabs-content m-top active'})orderTrackingDet = orderTrackingDetDiv.find('div',{'class':re.compile('tracking-remark')}).textupdateMap["subOrders.$.detailedStatus"] = orderTrackingDetstatus = self._getStatusFromDetailedStatus(orderTrackingDet)tr = orderTracking.findAll("div",{"class":"tap-bullet-area c-tab-trigger"})cashbackStatus = subOrder.get("cashBackStatus")if "approveDetails-complete" in str(tr):if "processingDetails-complete" in str(tr):if "shippingDetails-complete" in str(tr):if "delivery-complete" in str(tr):status = MStore.ORDER_DELIVEREDif cashbackStatus == Store.CB_PENDING:cashbackStatus = Store.CB_APPROVEDclosedStatus = Trueelse:status = MStore.ORDER_SHIPPEDcourierTrackAnchor = orderTrackingDetDiv.find('a')trackingUrl = courierTrackAnchor("href")trackingText = courierTrackAnchor.textcourierArr = trackingText.split(' : ')updateMap["subOrders.$.trackingUrl"] = trackingUrlupdateMap["subOrders.$.trackingNumber"] = courierArr[1]updateMap["subOrders.$.courierName"] = courierArr[0]else:status = MStore.ORDER_CANCELLEDclosedStatus = Trueif cashbackStatus == Store.CB_PENDING:cashbackStatus = Store.CB_CANCELLEDelse:status = MStore.ORDER_PLACEDelif str(tr) in ["approveDetails-ongoing"]:status=MStore.ORDER_PLACEDif "dead" in str(tr) or status==MStore.ORDER_CANCELLED:status = MStore.ORDER_CANCELLEDclosedStatus = Trueif cashbackStatus == Store.CB_PENDING:cashbackStatus = Store.CB_CANCELLEDupdateMap["subOrders.$.cashBackStatus"] = cashbackStatusupdateMap["subOrders.$.status"] = statusupdateMap["subOrders.$.closed"] = closedStatusif closed:closed = closedStatusbulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed, 'parseError':False}})bulk.execute()except:self.db.merchantOrder.update({"orderId":order['orderId']}, {"$set":{"parseError":True}})tprint("Could not update " + str(order['orderId']) + ' for store ' + self.getName())traceback.print_exc()def scrapeAffiliate(self, startDate=None, endDate=None):#get all yesterday's affiliate in pending or cancelled state and update to systemoffers = []br = getBrowserObject()br.open(AFFILIATE_URL)response = br.response() # copytoken = re.findall('window.__FK = "(.*?)"', ungzipResponse(response), re.IGNORECASE)[0]data = {'__FK':token,'email':'saholic1@gmail.com','password':'e8aacf6fc1e3998186a4a8e56e428f66'}br.open(AFFILIATE_LOGIN_URL, urllib.urlencode(data))for delta in range(1,2):yester5date = date.today() - timedelta(delta)syester5date = yester5date.strftime('%Y-%m-%d')for status in [AFF_STATUS_PENDING, AFF_STATUS_CANCELLED, AFF_STATUS_DISAPPROVED]:hasPagination=Truepagination = 0while hasPagination:pagination = pagination + 1print "pagination", paginationtry:br.open(AFF_REPORT_URL % (status, syester5date, syester5date, pagination))except:tprint("Could not fetch data for Status %s and date %s"%(status, syester5date))page = ungzipResponse(br.response())soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)soup.tabletry:tableElement = soup.findAll('table', {'class':'report-table fixtable'})[1]except:breaktrElements = tableElement.findAll('tr')trElements.pop(0)for trElement in trElements:tdElements = trElement.findAll('td')if tdElements[0].text.strip()=='No Products to display!':hasPagination = FalsecontinueproductCode = re.findall(r'pid=(.*)$', tdElements[0].find('a')['href'])[0]quantity =int(tdElements[3].text)price = int(float(tdElements[2].text.strip().replace(",","")))payOut = int(float(tdElements[6].text.strip().replace(",","")))saleAmount = int(float(tdElements[4].text.strip().replace(",","")))subTagId = tdElements[7].text.strip()category = tdElements[1].text.strip()affiliateInfo = FlipkartAffiliateInfo(subTagId, syester5date, productCode, price, quantity, saleAmount, payOut, status, category)affiliateInfo.productTitle = tdElements[0].find('a').text#updateMap['subOrders.$.unitPrice'] = price#updateMap['subOrders.$.cashBackAmount'], updateMap['subOrders.$.cashBacPercentage'] = self.getCashbackAmount(productCode, price)offers.append(affiliateInfo)self._saveToAffiliate(offers)yester5date = date.today() - timedelta(1)syester5date = yester5date.strftime('%Y-%m-%d')self.db.flipkartOrderAffiliateInfo.find({"saleDate":syester5date})for order in self.db.flipkartOrderAffiliateInfo.find({"saleDate":syester5date}):userId = NonesubTagId = Noneemail = NonesubTagId = order.get("subTagId")if subTagId:click = session.query(Clicks).filter_by(tag = subTagId).first()if click is not None:userId= click.user_iduser = session.query(Users.email).filter_by(id = userId).first()if user is not None:email = user.emailflipkartOrder = FlipkartOrders()flipkartOrder.user_id = userIdflipkartOrder.identifier = order.get("identifier")flipkartOrder.email = emailflipkartOrder.subtagId = order.get("subTagId")flipkartOrder.created = datetime.strptime(order.get("saleDate"), "%Y-%m-%d")flipkartOrder.status = order.get("conversionStatus")flipkartOrder.title = order.get("productTitle")flipkartOrder.price = order.get("price")flipkartOrder.quantity = order.get("quantity")flipkartOrder.productCode = order.get("productCode")skuData = getSkuData(2, order.get("productCode"))if skuData is not None:flipkartOrder.catalogId = skuData.get("skuBundleId")flipkartOrder.brand = skuData.get("brand")flipkartOrder.model = skuData.get("model_name")flipkartOrder.category = categoryMap.get(skuData.get("category_id"))flipkartOrder.title =skuData.get("source_product_name")session.commit()def _saveToAffiliate(self, offers):collection = self.db.flipkartOrderAffiliateInfomcollection = self.db.merchantOrderfor offer in offers:result1 = collection.update({"subTagId":offer.subTagId, "price":offer.price, "quantity":offer.quantity, "saleDate":offer.saleDate, "productCode":offer.productCode},{"$set":todict(offer)}, upsert=True)if result1.get("upserted") is not None:result2 = mcollection.update({"subTagId":offer.subTagId, "storeId":self.store_id, "subOrders.missingAff":True, "subOrders.productCode":offer.productCode}, {"$set":{"subOrders.$.missingAff":False}}, multi=True)if result2.get("updatedExisting") is True:mcollection.update({"_id":result1.get("upserted")}, {"missingOrders":False})def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):resp= {}try:br = getBrowserObject()url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]response = br.open(url)page = ungzipResponse(response)merchantOrderId = re.findall('reference_id=(.*?)&', orderSuccessUrl,re.IGNORECASE)[0]print merchantOrderIdsoup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)sections = soup.findAll("div", {"class":"ui-app-card-body"})sections.pop(1)merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.orderTrackingUrl = urlfor data in sections:name = data.findAll("span")i=0while i< len(name):if "key" in str(name[i]):if "Grand Total" in str(name[i]):#Total AmountmerchantOrder.paidAmount = int(re.findall(r'\d+', name[i+1].text)[0])elif "Order Date" in str(name[i]):merchantOrder.placedOn = name[i+1].texti=i+1merchantOrder.merchantOrderId = merchantOrderIdmainOrder = soup.find("ul",{"class":"m-bottom p-cart"})fkSubOrders = mainOrder.findAll("li")#remove unwanted listfkSubOrders.pop(-1)subOrders = []merchantOrder.subOrders = subOrders#fetching suborders detailsfor subOrder in fkSubOrders:y = subOrder.find("header").findAll("span")for suborderId in y:if "value emp" in str(suborderId):merchantSubOrderId = suborderId.textbreakorderItems = subOrder.find("ul").findAll("div", recursive=False)i=0for orderItem in orderItems:merchantOrder.placedOnmerchantSubOrderId = merchantSubOrderId + str(i)divs = orderItem.findAll('div', recursive=False)content = divs[0]orderTracking = divs[2]orderTrackingDet = divs[3]imgUrl = content.find('img')['src']lineDet = content.find("div",{"class":"product-info"})productTitle = lineDet.find("a",{"class":"product-title"}).textproductUrl = str(lineDet.find("a")['href'])start='pid='s=str(lineDet.find("a")['href'])productCode = re.findall(re.escape(start)+"(.*)",s)[0].strip()mname = lineDet.findAll("span")k=0while k<len(mname):if "note" in str(mname[k]):if "Color:" in str(mname[k]):productTitle = productTitle + " " + mname[k+1].textelif "Qty:" in str(mname[k]):quantity = int(mname[k+1].text)elif "Subtotal:" in str(mname[k]):amountPaid = int(re.findall(r'\d+', mname[k+1].text)[0])elif "Delivery:" in str(mname[k]):#Delivery Date for sub orderprint "Delivery Date " +mname[k+1].textk=k+1merchantsubOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid,MStore.ORDER_PLACED, quantity)merchantsubOrder.imgUrl = imgUrlmerchantsubOrder.productCode = productCodemerchantsubOrder.merchantSubOrderId = merchantSubOrderIdprint "productCode", productCodeprint "amountPaid", amountPaidcashbackAmount, cashbackPercent = self.getCashbackAmount(productCode, amountPaid)cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAmerchantsubOrder.cashBackAmount = cashbackAmountmerchantsubOrder.cashBackStatus = cashbackStatusmerchantsubOrder.cashBackPercentage = cashbackPercentsubOrders.append(merchantsubOrder)#To track shipping detailsstatus=-1tr = orderTracking.findAll("div",{"class":"tap-bullet-area c-tab-trigger"})if "approveDetails-complete" in str(tr):if "processingDetails-complete" in str(tr):if "shippingDetails-complete" in str(tr):if "delivery-complete" in str(tr):status = MStore.ORDER_DELIVEREDelse:status = MStore.ORDER_SHIPPEDorderTracking.find('div', {'class':'tracking-remark active'})else:status = MStore.ORDER_PLACEDelse:status = MStore.ORDER_PLACEDelif str(tr) in ["approveDetails-ongoing"]:status=MStore.ORDER_PLACEDif "dead" in str(tr) or "shippingDetails-returnOngoing" in str(tr) or "shippingDetails-return" in str(tr):status = MStore.ORDER_CANCELLEDprint "Sub Order Status " + str(status)trackingDetailsActive = orderTrackingDet.find("div",{"class":"c-tabs-content m-top active"})print "Sub order tracking description " + trackingDetailsActive.textif self._saveToOrder(todict(merchantOrder)):resp['result'] = 'ORDER_CREATED'else:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'return respexcept:traceback.print_exc()resp['result'] = 'ORDER_NOT_CREATED'return respdef _getStatusFromDetailedStatus(self, detailedStatus):for key, value in Store.OrderStatusMap.iteritems():if detailedStatus.lower() in value:return keyprint "Detailed Status need to be mapped", detailedStatus, "Store:", self.store_nameraise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)def flipkartOrderTracking(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):br = getBrowserObject()url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]response = br.open(url)page = ungzipResponse(response, br)soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)mainOrder = soup.findAll("ul",{"class":"m-bottom p-cart"})#fetching suborders detailsfor subOrder in mainOrder:subOrd = subOrder.findAll("li")subOrd.pop(len(subOrd)-1)for productInfo in subOrd:status=-1#To track shipping detailsn = productInfo.findAll("div",{"class":"tracking-progress grid-row cf"})for trackingStatus in n:tr = trackingStatus.findAll("div",{"class":"tap-bullet-area c-tab-trigger"})if "approveDetails-complete" in str(tr):print "Here"if "processingDetails-complete" in str(tr):print "There"if "shippingDetails-complete" in str(tr):print "Share"if "delivery-complete" in str(tr):print "Last"status = 3else:status = 2else:status = 1else:status = 0if "dead" in str(tr):status = 4print statusdef _saveToOrderFlipkart(self, order):collection = self.db.merchantOrderorder = collection.insert(order)def hex_md5(password):m = hashlib.md5()print(m.digest())def main():store = getStore(2)store.scrapeAffiliate()#store.scrapeStoreOrders()#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD3016502908102575&token=0db4c692bacbfbfc158b52358ac9e91e&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD3018701137253850&token=f7402ddcf2b63b37cc6bc528cc115d2f&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD0019279584515727&token=7d85d8c24d36b5a1efc8008634390c7e&src=or&pr=1')#store.flipkartOrderTracking(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD0019279584515727&token=7d85d8c24d36b5a1efc8008634390c7e&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD0019365336126533&token=dbce2bd4dc4023295b436a7d3c7986c9&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://dl.flipkart.com/orderresponse?reference_id=OD1019453634552336&token=e8e04871ad65b532aa53fa82bb34b901&src=or&pr=1')#hex_md5('spice@2020')#store.getCashbackAmount('MOBDVHC6XKKPZ3GZ', 5999)if __name__ == '__main__':main()def todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return obj