Rev 17100 | Rev 18086 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: Manish'''from bs4 import BeautifulSoupfrom bson.binary import Binaryfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.dao import AffiliateInfo, Order, SubOrder, ShopCluesAffiliateInfofrom dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, \Store as MStore, ungzipResponse, tprintfrom dtr.storage import Mongofrom dtr.storage.Mongo import getImgSrcfrom dtr.utils.utils import fetchResponseUsingProxy, PROXY_MESH_GENERALfrom pprint import pprintfrom pymongo import MongoClientimport jsonimport pymongoimport reimport timeimport tracebackimport urllibimport urllib2from urlparse import urlparse, parse_qsimport xml.etree.ElementTree as ETfrom dtr.storage import MemCachefrom dtr.storage.Mongo import getDealRankAFFLIATE_TRASACTIONS_URL = "https://admin.optimisemedia.com/v2/reports/affiliate/leads/leadsummaryexport.aspx?Contact=796881&Country=26&Agency=95&Merchant=420562&Status=-1&Year=%d&Month=%d&Day=%d&EndYear=%d&EndMonth=%d&EndDay=%d&DateType=0&Sort=CompletionDate&Login=1347562DA5E3EFF6FB1561765C47C782&Format=XML&RestrictURL=0"ORDER_TRACK_URL='http://www.shopclues.com/index.php?dispatch=order_lookup.details'ORDER_TRACK_URL_DB='https://sm.shopclues.com/trackOrder?'BASE_URL= 'http://www.shopclues.com'BASE_MURL= 'http://m.shopclues.com'class Store(MStore):'''This is to map order statuses of our system to order statuses of snapdeal.And our statuses will change accordingly.'''OrderStatusMap = {MStore.ORDER_PLACED : ['payment successful', 'new order - cod confirmation pending', 'processing', 'quality check','on schedule', 'processing - pickup initiated', 'processing - ready to dispatch','processing - procurement delay from merchant','processing - slight procurment delay from merchant','cod order confirmed by customer'],MStore.ORDER_DELIVERED : ['delivered', 'complete'],MStore.ORDER_SHIPPED : ['in transit', 'dispatched','shipped','order handed to courier','order handed over to courier'],MStore.ORDER_CANCELLED : ['payment failed', 'canceled', 'payment declined', 'order on hold - cancellation requested by customer', 'courier returned', 'canceled on customer request', 'canceled by customer','order canceled by customer','canceled - address not shippable','return complete','undelivered - returning to origin', 'canceled - shipment untraceable','order declined']}OrderStatusConfirmationMap= {"P" : "Payment Successful","D" : "Order Declined","O" : "New Order - COD confirmation Pending"}CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICEdef __init__(self,store_id):super(Store, self).__init__(store_id)def convertToObj(self,offer):orderRef = offer['MerchantRef']if len(orderRef)>15:orderRef = orderRef[0:len(orderRef)-10]offer1 = ShopCluesAffiliateInfo(offer['UID'], offer['TransactionTime'], offer['TransactionID'], orderRef, orderRef, offer['Merchant'], offer['PID'], offer['Product'], float(str(offer['SR'])), float(str(offer['TransactionValue'])), offer['UKey'], offer['ClickTime'], offer['Status'])return offer1def _saveToAffiliate(self, offers):collection = self.db.shopcluesOrderAffiliateInfomcollection = self.db.merchantOrderfor offerObj in offers:offer = self.convertToObj(offerObj)collection.update({"transactionId":offer.transactionId, "subTagId":offer.subTagId, "payOut":offer.payOut},{"$set":todict(offer)}, upsert=True)mcollection.update({"subTagId":offer.subTagId, "storeId":self.store_id, "subOrders.missingAff":True}, {"$set":{"subOrders.$.missingAff":False}})def scrapeAffiliate(self, startDate=datetime.today() - timedelta(days=10), endDate=datetime.today()):uri = AFFLIATE_TRASACTIONS_URL%(startDate.year,startDate.month,startDate.day,endDate.year,endDate.month,endDate.day)root = ET.parse(urllib2.urlopen(uri)).getroot()if len(root)> 0 and len(root[0])> 0:offers = []for child in root[0][0]:offers.append(child.attrib)self._saveToAffiliate(offers)def _setLastSaleDate(self, saleDate):self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})def getName(self):return "shopclues"def _getLastSaleDate(self,):lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})if lastDaySaleObj is None:return datetime.mindef _getStatusFromDetailedStatus(self, detailedStatus):for key, value in Store.OrderStatusMap.iteritems():if detailedStatus.lower() in value:return keyprint "Detailed Status need to be mapped", detailedStatus, self.store_idreturn Nonedef _getSingleSubOrderMap(self, orderId, soup, orderObj):orderStatus = self.OrderStatusConfirmationMap.get(orderObj['0']['status'])statusTime= str(to_py_date(long(orderObj['0']['timestamp'])))productDetailsMap = {}orderTables = soup.body.findAll("table", {'class':'table product-list'})orderTable = orderTables[len(orderTables)-1].findAll('tr', recursive=False)firstRow = orderTable.pop(0)totalColumns = len(firstRow.find_all('th'))jsonSubOrdersMap = {}count = 1for val in orderObj['0']['items'].values():newCount = 0counts = []for key in jsonSubOrdersMap.keys():splitKey = key.split('-')if orderObj['0']['is_parent_order'] == 'N':if str(val['order_id']) == splitKey[0]:counts.append(int(splitKey[1]))else:if val['child'] is None:if str(val['order_id']) == splitKey[0]:counts.append(int(splitKey[1]))else:if str(val['child']) == splitKey[0]:counts.append(int(splitKey[1]))if len(counts) >0:newCount = max(counts)count = newCount +1if orderObj['0']['is_parent_order'] == 'N':jsonSubOrdersMap[str(val['order_id'])+'-'+str(count)] = valelse:if val['child'] is None:jsonSubOrdersMap[str(val['order_id'])+'-'+str(count)] = valelse:jsonSubOrdersMap[str(val['child'])+'-'+str(count)] = valprint jsonSubOrdersMap.items()count = 1for orderTr in orderTable:productDetailsSubMap = NoneproductDetailsSubMap = {}cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailsproductUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.","").replace(',',''))discount = 0subtotal = 0if totalColumns == 5:if cols[3].text.strip()!='-' and 'Rs.' in cols[3].text.strip():discount = long(cols[3].text.strip().replace("Rs.","").replace(',',''))else:discount = 0subtotal = long(cols[4].text.strip().replace("Rs.","").replace(',',''))elif totalColumns == 6:if cols[3].text.strip()!='-' and 'Rs.' in cols[3].text.strip():discount = long(cols[3].text.strip().replace("Rs.","").replace(',',''))else:discount = 0subtotal = long(cols[5].text.strip().replace("Rs.","").replace(',',''))else:subtotal = long(cols[3].text.strip().replace("Rs.","").replace(',',''))productDetailsSubMap['productUrl']=BASE_MURL+productUrlproductDetailsSubMap['productName']=productNameproductDetailsSubMap['subOrderTrackingUrl']=ORDER_TRACK_URL_DB+'order_id=' +str(orderId)+'&email_id='+ orderObj['0']['email']productDetailsSubMap['sellingPrice']=sellingPriceproductDetailsSubMap['quantity']=quantityproductDetailsSubMap['discount']=discountproductDetailsSubMap['subtotal']=subtotalkey = str(orderId)+'-'+str(count)print 'SubOrder Map Key--',keyjsonSubOrderDetails = jsonSubOrdersMap.get(str(orderId)+'-'+str(count))productCode = jsonSubOrderDetails['product_code']productImgUrl = jsonSubOrderDetails['images']['image_path'][0]productDetailsSubMap['productCode']=productCodeproductDetailsSubMap['imgUrl']=productImgUrlproductDetailsSubMap['subOrderStatus']=orderStatusproductDetailsSubMap['subOrderStatusTime']=statusTimeproductDetailsMap[str(orderId)+'-'+str(count)]=productDetailsSubMapcount = count +1return productDetailsMapdef _getMultiSubOrdersMap(self, orderId, soup, orderObj):orderTables = soup.body.findAll("table", {'class':'table product-list'})orderTable = orderTables[len(orderTables)-1].findAll('tr', recursive=False)firstRow = orderTable.pop(0)#totalColumns = len(firstRow.find_all('td'))productDetailsMap = {}'''jsonSubOrdersMap = {}for val in orderObj['0']['items'].values():jsonSubOrdersMap[val['order_id']] = val'''existingOrders = []for orderTr in orderTable:subOrderDetailsMap = {}cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailssubOrderId= product_details[1].contents[0].strip()if subOrderId in existingOrders:continueelse:existingOrders.append(subOrderId)#productUrl = product_details[0].get('href')#productName = product_details[0].contents[0].strip()subOrderTrackingParsingUrl = product_details[1].get('href')#subOrderTrackingUrl = subOrderTrackingParsingUrl.split('order_lookup.details&')[1]'''quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.",""))discount = 0subtotal = 0if totalColumns == 5:if cols[3].text.strip()!='-' or 'Rs.' in cols[3].text.strip():discount = long(cols[3].text.strip().replace("Rs.",""))else:discount = 0subtotal = long(cols[4].text.strip().replace("Rs.",""))else:subtotal = long(cols[3].text.strip().replace("Rs.",""))productDetailsSubMap['productUrl']=BASE_MURL+productUrlproductDetailsSubMap['productName']=productNameproductDetailsSubMap['subOrderTrackingUrl']=ORDER_TRACK_URL_DB+subOrderTrackingUrlproductDetailsSubMap['sellingPrice']=sellingPriceproductDetailsSubMap['quantity']=quantityproductDetailsSubMap['discount']=discountproductDetailsSubMap['subtotal']=subtotaljsonSubOrderDetails = jsonSubOrdersMap.get(subOrderId)productCode = jsonSubOrderDetails['product_code']productImgUrl = jsonSubOrderDetails['images']['image_path'][0]productDetailsSubMap['productCode']=productCodeproductDetailsSubMap['imgUrl']=productImgUrl'''orderTrackingPage = fetchResponseUsingProxy(BASE_URL+subOrderTrackingParsingUrl)orderTrackingPageSoup = BeautifulSoup(orderTrackingPage)'''subOrderStatusList = orderTrackingPageSoup.findAll(attrs={'class' : 'price ord_status'})subOrderStatus = subOrderStatusList[0].contents[0].strip()subOrderStatusTime= orderTrackingPageSoup.findAll(attrs={'class' : 'sts no_mobile'})[1].contents[2].strip()productDetailsSubMap['subOrderStatus'] = subOrderStatusproductDetailsSubMap['subOrderStatusTime'] = subOrderStatusTimeproductDetailsSubMap['parentOrderId']=orderId'''subOrderDetailsMap = self._getSingleSubOrderMap(subOrderId, orderTrackingPageSoup, orderObj)productDetailsMap = dict(productDetailsMap.items()+subOrderDetailsMap.items())return productDetailsMapdef updateCashbackInSubOrders(self, subOrders):for subOrder in subOrders:cashbackStatus = Store.CB_NAcashbackAmount = 0percentage = 0amount = subOrder.amountPaidif amount > 0:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)if cashbackAmount > 0:cashbackStatus = Store.CB_PENDINGsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountsubOrder.cashBackPercentage = percentagereturn subOrdersdef _parseSingleOrderUsingJsonWithoutItems(self, orderId, userId, subTagId, orderObj, orderSuccessUrl):subOrders=[]ordertotal = long(float(orderObj['0']['total']))ordershippingcost = long(float(orderObj['0']['shipping_cost']))subtotal = ordertotal-ordershippingcostplacedOn = str(orderObj['0']['last_update'])totalDiscount = long(float(orderObj['0']['discount']))totalOrdersAmount = (ordertotal + totalDiscount) - ordershippingcostmerchantOrderId = orderObj['0']['order_id']promotionid = orderObj['0']['promotion_ids']promotionObj = orderObj['0']['promotions'][promotionid]productCode = str(promotionObj['bonuses'][0]['value'])skuData = Mongo.getItemByMerchantIdentifier(productCode, 5)merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = str(merchantOrderId)merchantOrder.paidAmount = ordertotalif len(skuData)>0:productUrl = BASE_MURL + str(urlparse(skuData.get('marketPlaceUrl')).path)subOrder = SubOrder(skuData.get('product_name'), productUrl, placedOn, subtotal)subOrder.merchantSubOrderId = str(merchantOrderId)+'-1'subOrder.detailedStatus = Store.OrderStatusConfirmationMap.get(str(orderObj['0']['status']))subOrder.imgUrl = skuData.get('thumbnail')subOrder.offerDiscount = totalDiscountsubOrder.unitPrice = totalOrdersAmountsubOrder.productCode = productCodesubOrder.amountPaid = subtotalsubOrder.quantity = 1subOrder.tracingkUrl = ORDER_TRACK_URL_DB + 'order_id=' +str(merchantOrderId)+'&email_id='+ str(orderObj['0']['email'])dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrders.append(subOrder)else:productSearch = fetchResponseUsingProxy(BASE_MURL+'/search?q='+productCode.strip())productSearchResultPage = BeautifulSoup(productSearch)productUrl = str(productSearchResultPage.find('a', {'class':'pd-list'})['href'])style = productSearchResultPage.find('div', {'class':'pd-image'})['style']imageurl = str(re.findall('url\(\"(.*?)\"\)', style)[0])productName = str(productSearchResultPage.find('div', {'class':'pdt-name'}).text)subOrder = SubOrder(productName, productUrl, placedOn, subtotal)subOrder.merchantSubOrderId = str(merchantOrderId)+'-1'subOrder.detailedStatus = Store.OrderStatusConfirmationMap.get(str(orderObj['0']['status']))subOrder.imgUrl = imageurlsubOrder.offerDiscount = totalDiscountsubOrder.unitPrice = totalOrdersAmountsubOrder.productCode = productCodesubOrder.amountPaid = subtotalsubOrder.quantity = 1subOrder.tracingkUrl = ORDER_TRACK_URL_DB + 'order_id=' +str(merchantOrderId)+'&email_id='+ str(orderObj['0']['email'])dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = 0subOrder.rankDesc = 'Not Applicable'subOrders.append(subOrder)merchantOrder.totalAmount = totalOrdersAmountmerchantOrder.discountApplied = totalDiscountmerchantOrder.deliveryCharges = ordershippingcostmerchantOrder.subOrders = self.updateCashbackInSubOrders(subOrders)return merchantOrderdef _parseMultiOrderUsingJsonWithoutItems(self, orderId, userId, subTagId, orderObj, orderSuccessUrl):pass'''subOrders=[]ordertotal = long(float(orderObj['0']['total']))ordershippingcost = long(float(orderObj['0']['shipping_cost']))subtotal = ordertotal-ordershippingcostplacedOn = str(orderObj['0']['last_update'])totalDiscount = long(float(orderObj['0']['discount']))totalOrdersAmount = (ordertotal + totalDiscount) - ordershippingcostmerchantOrderId = orderObj['0']['order_id']promotionids = orderObj['0']['promotion_ids'].split(',')promotions = orderObj['0']['promotions']merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = str(merchantOrderId)merchantOrder.paidAmount = ordertotalfor promotionId in promotionids:promotion = promotions[promotionId]productCode = str(promotion['bonuses'][0]['value'])skuData = Mongo.getItemByMerchantIdentifier(productCode, 5)return None'''def _parseOrders(self, orderId, mpOrderId, subTagId, userId, page, orderSuccessUrl, orderObj):soup = BeautifulSoup(page)productDetailsMap = {}paymentFields = soup.findAll(attrs={'class' : 'box_paymentcalculations_row'})orderDateList = soup.findAll(attrs={'class':'price ord_date'})placedOn= orderDateList[0].text.strip().replace("\t","").replace("\n","").replace("\r","").replace(" ","")ordersubtotal=0ordercluebucks=0ordershippingcost=0ordertotal=0for val in paymentFields:for value in val.contents:if value is not None:if 'div' in str(value).strip():if 'Subtotal' in value.text.strip():ordersubtotal = long(val.contents[3].text.strip().replace("Rs.","").replace(',',''))print 'Order SubTotal:- ',ordersubtotalif 'Shipping Cost' in value.text.strip():ordershippingcost = long(val.contents[3].text.strip().replace("Rs.","").replace(',',''))print 'Shipping Cost:- ',ordershippingcostif 'Clue' in value.text.strip():ordercluebucks = long(val.contents[3].text.strip().replace("Rs.","").replace(',',''))print 'Clue bucks:- ',ordercluebucksif 'Total' in value.text.strip():ordertotal = val.contents[3].text.strip().replace("Rs.","")ordertotal = ordertotal.replace(',','')print 'Order Total:- ',ordertotalif orderObj['0']['is_parent_order'] == 'N':productDetailsMap = self._getSingleSubOrderMap(mpOrderId, soup, orderObj)else:if orderObj['0']['items'].values()[0]['child'] is None:productDetailsMap = self._getSingleSubOrderMap(mpOrderId, soup, orderObj)else:productDetailsMap = self._getMultiSubOrdersMap(mpOrderId, soup, orderObj)merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = orderObj['0']['order_id']merchantOrder.paidAmount = ordertotaltotalOrdersAmount = 0totalDiscount = 0subOrders= []if len(productDetailsMap)==0:print 'Unable to get Sub Orders for Now:- ',orderObj['0']['order_id']for key in productDetailsMap:subOrderDetail = productDetailsMap.get(key)totalOrdersAmount = totalOrdersAmount + (subOrderDetail['sellingPrice'] * subOrderDetail['quantity'])totalDiscount = totalDiscount + (subOrderDetail['discount'] * subOrderDetail['quantity'])subOrder = SubOrder(subOrderDetail['productName'], subOrderDetail['productUrl'], placedOn, subOrderDetail['subtotal'])subOrder.merchantSubOrderId = keysubOrder.detailedStatus = subOrderDetail['subOrderStatus']subOrder.imgUrl = subOrderDetail['imgUrl']subOrder.offerDiscount = subOrderDetail['discount'] * subOrderDetail['quantity']subOrder.unitPrice = subOrderDetail['sellingPrice']subOrder.productCode = subOrderDetail['productCode']subOrder.amountPaid = subOrderDetail['subtotal']subOrder.quantity = subOrderDetail['quantity']subOrder.tracingkUrl = subOrderDetail['subOrderTrackingUrl']dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)subOrder.dealRank = dealRank.get('rank')subOrder.rankDesc = dealRank.get('description')subOrder.maxNlc = dealRank.get('maxNlc')subOrder.minNlc = dealRank.get('minNlc')subOrder.db = dealRank.get('dp')subOrder.itemStatus = dealRank.get('status')subOrders.append(subOrder)merchantOrder.totalAmount = totalOrdersAmountmerchantOrder.discountApplied = totalDiscountmerchantOrder.deliveryCharges = ordershippingcostmerchantOrder.subOrders = self.updateCashbackInSubOrders(subOrders)return merchantOrderdef parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):resp = {}try:rawHtmlSoup = BeautifulSoup(rawHtml)emailId = NoneorderObj = NonemerchantOrderId = Nonefor script in rawHtmlSoup.find_all('script'):if 'orderConfirmation' in script.text:print script.text.strip().split('\n')[0].split('orderConfirmation = ')[1].split(';')[0]orderObj = json.loads(script.text.strip().split('\n')[0].split('orderConfirmation = ')[1].split(';')[0])if merchantOrderId is None:merchantOrderId = orderObj['0']['order_id']emailId = orderObj['0']['email']if orderObj is None or merchantOrderId is None:orderIdDiv = rawHtmlSoup.body.find("div", {'class':'conf_succes'})if orderIdDiv is None:orderIdArt = rawHtmlSoup.body.find("article", {'class':'white'})if orderIdArt is None:resp['result'] = 'ORDER_NOT_CREATED_UNKNOWN'return respelse:orderIdDiv= orderIdArt.find('p', recursive=False)orderIdVal = str(orderIdDiv.text.split(' : ')[1])print "Opening Shopclues Login Page"login_url = "https://sm.shopclues.com/login"br1 = login(login_url)orderDetailPage = br1.open("https://sm.shopclues.com/orderconfirmation?order_id="+orderIdVal+"&status=P")orderDetailPage = ungzipResponse(orderDetailPage)rawHtmlSoup = BeautifulSoup(orderDetailPage)for script in rawHtmlSoup.find_all('script'):if 'orderConfirmation' in script.text:orderObj = json.loads(script.text.strip().split('\n')[0].split('orderConfirmation = ')[1].split(';')[0])if merchantOrderId is None:merchantOrderId = orderObj['0']['order_id']emailId = orderObj['0']['email']logoutpage = br1.open("https://sm.shopclues.com/logout")print br1.geturl()else:orderIdDiv= orderIdDiv.find('div', recursive=False)orderIdVal = str(orderIdDiv.span.text)print "Opening Shopclues Login Page"login_url = "https://sm.shopclues.com/login"br1 = login(login_url)orderDetailPage = br1.open("https://sm.shopclues.com/orderconfirmation?order_id="+orderIdVal+"&status=P")orderDetailPage = ungzipResponse(orderDetailPage)rawHtmlSoup = BeautifulSoup(orderDetailPage)for script in rawHtmlSoup.find_all('script'):if 'orderConfirmation' in script.text:orderObj = json.loads(script.text.strip().split('\n')[0].split('orderConfirmation = ')[1].split(';')[0])if merchantOrderId is None:merchantOrderId = orderObj['0']['order_id']emailId = orderObj['0']['email']logoutpage = br1.open("https://sm.shopclues.com/logout")print br1.geturl()if type(orderObj['0']['items']) is list:print "Opening Shopclues Login Page"login_url = "https://sm.shopclues.com/login"br1 = login(login_url)orderDetailPage = br1.open("https://sm.shopclues.com/orderconfirmation?order_id="+str(merchantOrderId)+"&status=P")orderDetailPage = ungzipResponse(orderDetailPage)rawHtmlSoup = BeautifulSoup(orderDetailPage)for script in rawHtmlSoup.find_all('script'):if 'orderConfirmation' in script.text:orderObj = json.loads(script.text.strip().split('\n')[0].split('orderConfirmation = ')[1].split(';')[0])if merchantOrderId is None:merchantOrderId = orderObj['0']['order_id']emailId = orderObj['0']['email']logoutpage = br1.open("https://sm.shopclues.com/logout")print br1.geturl()br = getBrowserObject()url = ORDER_TRACK_URL +'&order_id=' +str(merchantOrderId)+'&email_id='+ emailIdpage = br.open(url)print 'Track Order Url ', br.geturl()if type(orderObj['0']['items']) is list:if len(orderObj['0']['promotions'])==1:merchantOrder = self._parseSingleOrderUsingJsonWithoutItems(orderId, userId, subTagId, orderObj, orderSuccessUrl)merchantOrder.orderTrackingUrl = ORDER_TRACK_URL_DB + 'order_id=' +str(merchantOrderId)+'&email_id='+ emailIdif self._saveToOrder(todict(merchantOrder)):resp['result'] = 'ORDER_CREATED'return respelse:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'return resp'''else:resp['result'] = 'ORDER_NOT_CREATED_KNOWN'return respelif len(orderObj['0']['promotions'])>1:merchantOrder = self._parseMultiOrderUsingJsonWithoutItems(orderId, userId, subTagId, orderObj, orderSuccessUrl)'''headers = str(page.info()).split('\n')page = ungzipResponse(page)jsonResponse = Nonefor header in headers:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonResponse = json.loads(page)if jsonResponse is not None:page = jsonResponse['text']merchantOrder = self._parseOrders(orderId, merchantOrderId, subTagId, userId, page, orderSuccessUrl, orderObj)merchantOrder.orderTrackingUrl = ORDER_TRACK_URL_DB + 'order_id=' +str(merchantOrderId)+'&email_id='+ emailIdif self._saveToOrder(todict(merchantOrder)):resp['result'] = 'ORDER_CREATED'else:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'return respexcept:print "Error occurred"traceback.print_exc()resp['result'] = 'ORDER_NOT_CREATED'return respdef parseSingleSubOrder(self, soup, emailId, subOrderId):orderStatusList = soup.findAll(attrs={'class' : 'price ord_status'})subOrderStatus = orderStatusList[0].contents[0].strip()orderDateList = soup.findAll(attrs={'class':'price ord_date'})placedOn= orderDateList[0].text.strip().replace("\t","").replace("\n","").replace("\r","").replace(" ","")orderTables = soup.body.findAll("table", {'class':'table product-list'})orderTable = orderTables[len(orderTables)-1].findAll('tr', recursive=False)firstRow = orderTable.pop(0)totalColumns = len(firstRow.find_all('th'))subOrders =[]count =1for orderTr in orderTable:cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailsproductUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.","").replace(',',''))discount = 0subtotal = 0if totalColumns == 5:if cols[3].text.strip()!='-' and 'Rs.' in cols[3].text.strip():discount = long(cols[3].text.strip().replace("Rs.","").replace(',',''))else:discount = 0subtotal = long(cols[4].text.strip().replace("Rs.","").replace(',',''))elif totalColumns == 6:if cols[3].text.strip()!='-' and 'Rs.' in cols[3].text.strip():discount = long(cols[3].text.strip().replace("Rs.","").replace(',',''))else:discount = 0subtotal = long(cols[5].text.strip().replace("Rs.","").replace(',',''))else:subtotal = long(cols[3].text.strip().replace("Rs.","").replace(',',''))productPage = fetchResponseUsingProxy(BASE_MURL+productUrl)productPageSoup = BeautifulSoup(productPage)productCode = productPageSoup.find('input', {'type':'hidden'})['value']allproductImageTags = productPageSoup.findAll(attrs={'class' : 'pd-image'})productImgUrl = ''if allproductImageTags is not None and len(allproductImageTags)>0:productImgUrl= allproductImageTags[0].get('style').split("background:url('")[1].split("')no-repeat center")[0].strip()subOrder = SubOrder(productName, productUrl, placedOn, subtotal)subOrder.merchantSubOrderId = str(subOrderId)+'-'+str(count)subOrder.detailedStatus = subOrderStatussubOrder.imgUrl = productImgUrlsubOrder.offerDiscount = discount*quantitysubOrder.unitPrice = sellingPricesubOrder.productCode = productCodesubOrder.amountPaid = subtotalsubOrder.quantity = quantitysubOrder.tracingkUrl = ORDER_TRACK_URL_DB + 'order_id=' +subOrderId+'&email_id='+ emailIdsubOrders.append(subOrder)count = count +1subOrders = self.updateCashbackInSubOrders(subOrders)return subOrdersdef parseMultiSubOrders(self, soup, emailId):orderTables = soup.body.findAll("table", {'class':'table product-list'})orderTable = orderTables[len(orderTables)-1].findAll('tr', recursive=False)firstRow = orderTable.pop(0)'''totalColumns = len(firstRow.find_all('td'))orderDateList = soup.findAll(attrs={'class':'price ord_date'})placedOn= orderDateList[0].text.strip().replace("\t","").replace("\n","").replace(" ","")'''subOrders = []existingOrders = []for orderTr in orderTable:cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailssubOrderId= product_details[1].contents[0].strip()if subOrderId in existingOrders:continueelse:existingOrders.append(subOrderId)'''productUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()'''subOrderTrackingParsingUrl = product_details[1].get('href')#subOrderTrackingUrl = subOrderTrackingParsingUrl.split('order_lookup.details&')[1]'''quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.",""))discount = 0subtotal = 0if totalColumns == 5:if cols[3].text.strip()!='-' or 'Rs.' in cols[3].text.strip():discount = long(cols[3].text.strip().replace("Rs.",""))else:discount = 0subtotal = long(cols[4].text.strip().replace("Rs.",""))else:subtotal = long(cols[3].text.strip().replace("Rs.",""))br = getBrowserObject()productPage = br.open(BASE_MURL+productUrl)productPageHeaders = str(productPage.info()).split('\n')productPage = ungzipResponse(productPage)jsonProductResponse = Nonefor header in productPageHeaders:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonProductResponse = json.loads(productPage)productPageSoup= Noneif jsonProductResponse is not None:productPageSoup = BeautifulSoup(jsonProductResponse['text'])else:productPageSoup = BeautifulSoup(productPage)productCode = productPageSoup.find('input', {'type':'hidden'})['value']allproductImageTags = productPageSoup.findAll(attrs={'class' : 'pd-image'})productImgUrl = ''if allproductImageTags is not None and len(allproductImageTags)>0:productImgUrl= allproductImageTags[0].get('style').split("background:url('")[1].split("')no-repeat center")[0].strip()productDetailsSubMap['productCode']=productCodeproductDetailsSubMap['imgUrl']=productImgUrl'''orderTrackingPage = fetchResponseUsingProxy(BASE_URL+subOrderTrackingParsingUrl)orderTrackingPageSoup = BeautifulSoup(orderTrackingPage)subOrdersDetails = self.parseSingleSubOrder(orderTrackingPageSoup, emailId, subOrderId)subOrders = list(set(subOrders + subOrdersDetails))return subOrdersdef scrapeStoreOrders(self,):#collectionMap = {'palcedOn':1}searchMap = {}collectionMap = {"orderTrackingUrl":1}orders = self._getActiveOrders(searchMap,collectionMap)for order in orders:print "Order", self.store_name, order['orderId'], order['orderTrackingUrl']url = ORDER_TRACK_URL +'&'+ order['orderTrackingUrl'].split('trackOrder?')[1]print urlpage = Noneretry = 1while retry <=3:try:page = fetchResponseUsingProxy(url)breakexcept:traceback.print_exc()retry = retry + 1soup = BeautifulSoup(page)bulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = TrueorderIdSpan = soup.body.find("span", {'class':'price ord_no'})if orderIdSpan is not None:orderStatusList = soup.findAll(attrs={'class' : 'price ord_status'})if orderStatusList is not None and len(orderStatusList)>0:subOrderId = soup.findAll(attrs={'class':'price ord_no'})[0].text.strip()orderStatus = orderStatusList[0].contents[0].strip()orderTables = soup.body.findAll("table", {'class':'table product-list'})orderTable = orderTables[len(orderTables)-1].findAll('tr', recursive=False)orderTable.pop(0)count = 1while count <= len(orderTable):subbulk = self.db.merchantOrder.initialize_ordered_bulk_op()print 'Sub Order Id', str(subOrderId)+'-'+str(count)subOrder = self._isSubOrderActive(order, str(subOrderId)+'-'+str(count))if subOrder is None:try:print 'Email Id:- '+ str(order['orderTrackingUrl'].split('email_id=')[1])+' and Order Id:- ' + str(subOrderId)subOrders = self.parseSingleSubOrder(soup, order['orderTrackingUrl'].split('email_id=')[1], subOrderId)for subOrder in subOrders:if subOrder is None:continueself.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict([subOrder])}}})print "Added new suborders to Order id - ", order['orderId']closed = Falseexcept:traceback.print_exc()passcount = count+1continueelif subOrder['closed']:count = count+1continueelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": str(subOrderId)+'-'+str(count)}updateMap = {}updateMap["subOrders.$.detailedStatus"] = orderStatusstatus = self._getStatusFromDetailedStatus(orderStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]if status is not None:updateMap["subOrders.$.status"] = statusif closedStatus:#if status is closed then change the paybackStatus accordinglyprint 'Order Closed'updateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falseprint 'Order not Closed'subbulk.find(findMap).update({'$set' : updateMap})subresult = subbulk.execute()tprint(subresult)count = count +1bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)else:subOrdersList = []try:subOrdersList = self.parseMultiSubOrders(soup, order['orderTrackingUrl'].split('email_id=')[1])except:print 'Unable to parse', order['orderId'], order['orderTrackingUrl']continuefor subOrderObj in subOrdersList:subbulk1 = self.db.merchantOrder.initialize_ordered_bulk_op()subOrder = self._isSubOrderActive(order, subOrderObj.merchantSubOrderId)if subOrder is None:self.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict([subOrderObj])}}})print "Added new suborders to Order id - ", order['orderId']closed = Falsecontinueelif subOrder['closed']:continueelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": subOrderId}updateMap = {}updateMap["subOrders.$.detailedStatus"] = subOrderObj.detailedStatusstatus = self._getStatusFromDetailedStatus(orderStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]if status is not None:updateMap["subOrders.$.status"] = statusif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falsesubbulk1.find(findMap).update({'$set' : updateMap})subresult1 = subbulk1.execute()tprint(subresult1)bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)else:print 'Soup Object not found for this Order', order['orderId'], order['orderTrackingUrl']continue'''def getTrackingUrls(self, userId):missingOrderUrls = ['https://sm.shopclues.com/myaccount','https://sm.shopclues.com/myorders']return missingOrderUrlsdef parseMyProfileForEmailId(self, userId, url, rawhtml):profileSoup = BeautifulSoup(rawhtml)if profileSoup.find('input', {'name':'user_email'}) is not None:emailId = profileSoup.find('input', {'name':'user_email'})['value']if emailId is not None and emailId.strip() !='':mc = MemCache()mc.set(str(userId), emailId, 600)return 'EMAIL_SET_SUCCESS'else:return 'EMAIL_NOT_FOUND'else:return 'EMAIL_NOT_FOUND'def parseMyOrdersForEmailId(self, userId, url, rawhtml):myOrdersPageSoup = BeautifulSoup(rawhtml)if myOrdersPageSoup.find('a', {'class':'detail'}) is not None:emailId = myOrdersPageSoup.find('a', {'class':'detail'})['href'].split('&')[1].split('=')[1]mc = MemCache()mc.set(str(userId), emailId, 600)return 'EMAIL_SET_SUCCESS'else:return 'EMAIL_NOT_FOUND''''def to_py_date(java_timestamp):date = datetime.fromtimestamp(java_timestamp)return datedef todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return objdef login(url):br = getBrowserObject()br.set_proxies({"http": PROXY_MESH_GENERAL})br.open(url)response = br.open(url)ungzipResponseBr(response, br)#html = response.read()#print htmlbr.select_form(nr=0)br.form['user'] = "imanthetester@gmail.com"br.form['password'] = "$Dl8uk"response = br.submit()print "********************"print "Attempting to Login"print "********************"#ungzipResponse(response, br)return brdef ungzipResponseBr(r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()headers["Content-type"] = "text/html; charset=utf-8"r.set_data( html )b.set_response(r)