Rev 13796 | Rev 14290 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: amit'''from BeautifulSoup import BeautifulSoupfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.dao import AffiliateInfo, Order, SubOrderfrom dtr.main import getBrowserObject, getStore, ParseException, ungzipResponse, \Store as MStore, sourceMapfrom pprint import pprintfrom pymongo.mongo_client import MongoClientimport hashlibimport importlibimport jsonimport mechanizeimport reimport urllibUSERNAME='saholic1@gmail.com'PASSWORD='spice@2020'ORDER_TRACK_URL='https://m.flipkart.com/order_details'AFFILIATE_URL='https://www.flipkart.com/affiliate/login'AFFILIATE_LOGIN_URL='https://www.flipkart.com/affiliate/a_login'AFF_REPORT_URL='http://www.flipkart.com/affiliate/reports/ordersReport?order_status_filter=%s&startdate=%s&enddate=%s'AFF_STATUS_CANCELLED='cancelled'AFF_STATUS_APPROVED='approved'AFF_STATUS_DISAPPROVED='disapproved'AFF_STATUS_PENDING='pending'class Store(MStore):OrderStatusMap = {main.Store.ORDER_PLACED : ['Approval', 'Processing'],main.Store.ORDER_DELIVERED : ['Shipping'],main.Store.ORDER_SHIPPED : ['In Transit'],main.Store.ORDER_CANCELLED : ['']}def __init__(self,store_id):client = MongoClient('mongodb://localhost:27017/')self.db = client.dtrsuper(Store, self).__init__(store_id)def getName(self):return "flipkart"def scrapeStoreOrders(self,):orders = self._getActiveOrders()print "Found orders", ordersbr = getBrowserObject()for order in orders:closed = Trueurl = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', order['orderSuccessUrl'],re.IGNORECASE)[0]response = br.open(url)page = ungzipResponse(response)soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)sections = soup.findAll("div", {"class":"ui-app-card-body"})sections.pop(1)mainOrder = soup.find("ul",{"class":"m-bottom p-cart"})fkSubOrders = mainOrder.findAll("li")#remove unwanted listfkSubOrders.pop(-1)bulk = self.db.merchantOrder.initialize_ordered_bulk_op()#fetching suborders detailsfor subOrder in fkSubOrders:updateMap = {}y = subOrder.find("header").findAll("span")for suborderId in y:if "value emp" in str(suborderId):merchantSubOrderId = suborderId.textbreakorderItems = subOrder.find("ul").findAll("div", recursive=False)i=0for orderItem in orderItems:closedStatus = FalsemerchantSubOrderId = merchantSubOrderId + str(i)subOrder = self._isSubOrderActive(order, merchantSubOrderId)if subOrder is None:breakfindMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}divs = orderItem.findAll('div', recursive=False)orderTracking = divs[2]orderTrackingDetDiv = divs[3].find('div',{'class':'c-tabs-content m-top active'})orderTrackingDet = orderTrackingDetDiv.div.text.strip()updateMap["subOrders.$.detailedStatus"] = orderTrackingDettr = orderTracking.findAll("div",{"class":"tap-bullet-area c-tab-trigger"})cashbackStatus = subOrder.get("cashBackStatus")if "approveDetails-complete" in str(tr):if "processingDetails-complete" in str(tr):if "shippingDetails-complete" in str(tr):if "delivery-complete" in str(tr):status = MStore.ORDER_DELIVEREDif cashbackStatus == Store.CB_INIT:cashbackStatus = Store.CB_APPROVEDclosedStatus = Trueelse:status = MStore.ORDER_SHIPPEDcourierTrackAnchor = orderTrackingDetDiv.find('a')trackingUrl = courierTrackAnchor("href")trackingText = courierTrackAnchor.textcourierArr = trackingText.split(' : ')updateMap["subOrders.$.trackingUrl"] = trackingUrlupdateMap["subOrders.$.trackingNumber"] = courierArr[1]updateMap["subOrders.$.courierName"] = courierArr[0]else:status = MStore.ORDER_PLACEDelse:status = MStore.ORDER_PLACEDelif str(tr) in ["approveDetails-ongoing"]:status=MStore.ORDER_PLACEDif "dead" in str(tr):status = MStore.ORDER_CANCELLEDclosedStatus = Trueif cashbackStatus == Store.CB_INIT:cashbackStatus = Store.CB_CANCELLEDupdateMap["subOrders.$.cashBackStatus"] = cashbackStatusupdateMap["subOrders.$.status"] = statusupdateMap["subOrders.$.closed"] = closedStatusif closed:closed = closedStatusbulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed}})bulk.execute()def scrapeAffiliate(self, startDate=None, endDate=None):#get all yesterday's affiliate in pending or cancelled state and update to systemif datetime.now().hour == 1:yesterdate = date.today() - timedelta(1)syesterdate = yesterdate.strftime('%Y-%m-%d')br = getBrowserObject()br.open(AFFILIATE_URL)response = br.response() # copytoken = re.findall('window.__FK = "(.*?)"', ungzipResponse(response), re.IGNORECASE)[0]data = {'__FK':token,'email':'saholic1@gmail.com','password':'e8aacf6fc1e3998186a4a8e56e428f66'}br.open(AFFILIATE_LOGIN_URL, urllib.urlencode(data))for status in [AFF_STATUS_PENDING, AFF_STATUS_CANCELLED, AFF_STATUS_DISAPPROVED]:br.open(AFF_REPORT_URL % (status, syesterdate, syesterdate))page = ungzipResponse(br.response())soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)try:tableElement = soup.findAll('table', {'class':'report-table fixtable'})[1]except:continuetrElements = tableElement.findAll('tr')trElements.pop(0)for trElement in trElements:tdElements = trElement.findAll('td')productCode = re.findall(r'pid=(.*)$', tdElements[0].find('a')['href'])[0]quantity =int(tdElements[3].text)price = int(re.findall(r'\d+', tdElements[2].text)[0])payOut = int(re.findall(r'\d+', tdElements[6].text)[0])subTagId = tdElements[7].textaffiliateInfo = AffiliateInfo(subTagId, self.store_id, status, None, syesterdate, payOut, None, None, price*quantity)print todict(affiliateInfo)updateMap = {}updateMap['subOrders.$.unitPrice'] = priceupdateMap['subOrders.$.cashBackAmount'], updateMap['subOrders.$.cashBacPercentage'] = self.getCashbackAmount(productCode, price)self._updateOrdersPayBackStatus({'subTagId':subTagId, 'subOrders.productCode':productCode}, updateMap)def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):resp= {}try:br = getBrowserObject()url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]response = br.open(url)page = ungzipResponse(response)merchantOrderId = re.findall('reference_id=(.*?)&', orderSuccessUrl,re.IGNORECASE)[0]print merchantOrderIdsoup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)sections = soup.findAll("div", {"class":"ui-app-card-body"})sections.pop(1)merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)for data in sections:name = data.findAll("span")i=0while i< len(name):if "key" in str(name[i]):if "Grand Total" in str(name[i]):#Total AmountmerchantOrder.paidAmount = int(re.findall(r'\d+', name[i+1].text)[0])elif "Order Date" in str(name[i]):merchantOrder.placedOn = name[i+1].texti=i+1merchantOrder.merchantOrderId = merchantOrderIdmainOrder = soup.find("ul",{"class":"m-bottom p-cart"})fkSubOrders = mainOrder.findAll("li")#remove unwanted listfkSubOrders.pop(-1)subOrders = []merchantOrder.subOrders = subOrders#fetching suborders detailsfor subOrder in fkSubOrders:y = subOrder.find("header").findAll("span")for suborderId in y:if "value emp" in str(suborderId):merchantSubOrderId = suborderId.textbreakorderItems = subOrder.find("ul").findAll("div", recursive=False)i=0for orderItem in orderItems:merchantOrder.placedOnmerchantSubOrderId = merchantSubOrderId + str(i)divs = orderItem.findAll('div', recursive=False)content = divs[0]orderTracking = divs[2]orderTrackingDet = divs[3]imgUrl = content.find('img')['src']lineDet = content.find("div",{"class":"product-info"})productTitle = lineDet.find("a",{"class":"product-title"}).textproductUrl = str(lineDet.find("a")['href'])start='pid='s=str(lineDet.find("a")['href'])productCode = re.findall(re.escape(start)+"(.*)",s)[0]mname = lineDet.findAll("span")k=0while k<len(mname):if "note" in str(mname[k]):if "Color:" in str(mname[k]):productTitle = productTitle + " " + mname[k+1].textelif "Qty:" in str(mname[k]):quantity = int(mname[k+1].text)elif "Subtotal:" in str(mname[k]):amountPaid = re.findall(r'\d+', mname[k+1].text)[0]elif "Delivery:" in str(mname[k]):#Delivery Date for sub orderprint "Delivery Date " +mname[k+1].textk=k+1merchantsubOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid,MStore.ORDER_PLACED, quantity)merchantsubOrder.imgUrl = imgUrlmerchantsubOrder.productCode = productCodemerchantsubOrder.merchantSubOrderId = merchantSubOrderIdcashbackAmount, cashbackPercent = self.getCashbackAmount(subOrder.productCode, amountPaid)cashbackStatus = Store.CB_PENDINGif cashbackAmount <= 0:cashbackStatus = Store.CB_NAmerchantsubOrder.cashBackAmount = cashbackAmountmerchantsubOrder.cashBackStatus = cashbackStatusmerchantsubOrder.cashBackPercentage = cashbackPercentsubOrders.append(merchantsubOrder)#To track shipping detailsstatus=-1tr = orderTracking.findAll("div",{"class":"tap-bullet-area c-tab-trigger"})if "approveDetails-complete" in str(tr):if "processingDetails-complete" in str(tr):if "shippingDetails-complete" in str(tr):if "delivery-complete" in str(tr):status = MStore.ORDER_DELIVEREDelse:status = MStore.ORDER_SHIPPEDorderTracking.find('div', {'class':'tracking-remark active'})else:status = MStore.ORDER_PLACEDelse:status = MStore.ORDER_PLACEDelif str(tr) in ["approveDetails-ongoing"]:status=MStore.ORDER_PLACEDif "dead" in str(tr):status = MStore.ORDER_CANCELLEDprint "Sub Order Status " + str(status)trackingDetailsActive = orderTrackingDet.find("div",{"class":"c-tabs-content m-top active"})print "Sub order tracking description " + trackingDetailsActive.textself._saveToOrder(todict(merchantOrder))resp['result'] = 'ORDER_CREATED'return respexcept:resp['result'] = 'PARSE_ERROR'return respdef flipkartOrderTracking(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):br = getBrowserObject()url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]response = br.open(url)page = ungzipResponse(response, br)soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)mainOrder = soup.findAll("ul",{"class":"m-bottom p-cart"})#fetching suborders detailsfor subOrder in mainOrder:subOrd = subOrder.findAll("li")subOrd.pop(len(subOrd)-1)for productInfo in subOrd:status=-1#To track shipping detailsn = productInfo.findAll("div",{"class":"tracking-progress grid-row cf"})for trackingStatus in n:tr = trackingStatus.findAll("div",{"class":"tap-bullet-area c-tab-trigger"})if "approveDetails-complete" in str(tr):print "Here"if "processingDetails-complete" in str(tr):print "There"if "shippingDetails-complete" in str(tr):print "Share"if "delivery-complete" in str(tr):print "Last"status = 3else:status = 2else:status = 1else:status = 0if "dead" in str(tr):status = 4print statusdef _saveToOrderFlipkart(self, order):collection = self.db.merchantOrderorder = collection.insert(order)def hex_md5(password):m = hashlib.md5()print(m.digest())def main():store = getStore(2)#store.scrapeAffiliate()#store.parseOrderRawHtml(12346, "SHA21423034609", 122324, "html", 'https://www.flipkart.com/orderresponse?reference_id=OD0019889963104132&token=198b370dfc5a25c017b5982c369ee960&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD3016502908102575&token=0db4c692bacbfbfc158b52358ac9e91e&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD3018701137253850&token=f7402ddcf2b63b37cc6bc528cc115d2f&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD0019279584515727&token=7d85d8c24d36b5a1efc8008634390c7e&src=or&pr=1')#store.flipkartOrderTracking(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD0019279584515727&token=7d85d8c24d36b5a1efc8008634390c7e&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://m.flipkart.com/orderresponse?reference_id=OD0019365336126533&token=dbce2bd4dc4023295b436a7d3c7986c9&src=or&pr=1')#store.parseOrderRawHtml(12346, "subtagId", 122324, "html", 'https://dl.flipkart.com/orderresponse?reference_id=OD1019453634552336&token=e8e04871ad65b532aa53fa82bb34b901&src=or&pr=1')#hex_md5('spice@2020')store.scrapeStoreOrders()if __name__ == '__main__':main()def todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return obj