Rev 15509 | Rev 15940 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: Manish'''from bs4 import BeautifulSoupfrom bson.binary import Binaryfrom datetime import datetime, date, timedeltafrom dtr import mainfrom dtr.dao import AffiliateInfo, Order, SubOrderfrom dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, \Store as MStore, ungzipResponse, tprintfrom dtr.storage import Mongofrom dtr.storage.Mongo import getImgSrcfrom dtr.utils.utils import fetchResponseUsingProxyfrom pprint import pprintfrom pymongo import MongoClientimport jsonimport pymongoimport reimport timeimport tracebackimport urllibfrom urlparse import urlparse, parse_qsUSERNAME='profittill2@gmail.com'PASSWORD='spice@2020'AFFILIATE_URL='http://affiliate.snapdeal.com'POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'ORDER_TRACK_URL='http://www.shopclues.com/index.php?dispatch=order_lookup.details'CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'BASE_URL= 'http://www.shopclues.com'BASE_MURL= 'http://m.shopclues.com'class Store(MStore):'''This is to map order statuses of our system to order statuses of snapdeal.And our statuses will change accordingly.'''OrderStatusMap = {MStore.ORDER_PLACED : ['payment successful', 'new order - cod confirmation pending', 'processing', 'quality check','on schedule', 'processing - pickup-initiated', 'processing - ready to dispatch'],MStore.ORDER_DELIVERED : ['delivered', 'complete'],MStore.ORDER_SHIPPED : ['in transit', 'dispatched','shipped','order handed to courier'],MStore.ORDER_CANCELLED : ['payment failed', 'canceled', 'payment declined', 'order on hold - cancellation requested by customer', 'courier returned']}OrderStatusConfirmationMap= {"Payment Successful" : "P","Order Declined" : "D","New Order - COD confirmation Pending" : "O"}CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICE'''def scrapeAffiliate(self, startDate=None, endDate=None):br = getBrowserObject()br.open(AFFILIATE_URL)br.select_form(nr=0)br.form['data[User][password]'] = PASSWORDbr.form['data[User][email]'] = USERNAMEbr.submit()response = br.open(CONFIG_URL)token = re.findall('"session_token":"(.*?)"', ungzipResponse(response), re.IGNORECASE)[0]print tokenallOffers = self._getAllOffers(br, token)self._saveToAffiliate(allOffers)'''def _setLastSaleDate(self, saleDate):self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})def getName(self):return "shopclues"def _getLastSaleDate(self,):lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})if lastDaySaleObj is None:return datetime.mindef _getStatusFromDetailedStatus(self, detailedStatus):for key, value in Store.OrderStatusMap.iteritems():if detailedStatus.lower() in value:return keyprint "Detailed Status need to be mapped", detailedStatus, self.store_idreturn Nonedef _getSingleSubOrderMap(self, orderId, orderStatus, statusTime, soup, subTagId):productDetailsMap = {}orderTable = soup.body.find("table", {'class':'table product-list'}).findAll('tr', recursive=False)orderTable.pop(0)productDetailsSubMap = {}for orderTr in orderTable:cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailsproductUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.",""))discount = long(cols[3].text.strip().replace("Rs.",""))subtotal = long(cols[4].text.strip().replace("Rs.",""))productDetailsSubMap['productUrl']=BASE_MURL+productUrlproductDetailsSubMap['productName']=productNameproductDetailsSubMap['subOrderTrackingUrl']=ORDER_TRACK_URL+'&order_id=' +str(orderId)+'&email_id='+ subTagId.split('$')[1]productDetailsSubMap['sellingPrice']=sellingPriceproductDetailsSubMap['quantity']=quantityproductDetailsSubMap['discount']=discountproductDetailsSubMap['subtotal']=subtotalproductDetailsSubMap['parentOrderId']=orderIdbr = getBrowserObject()productPage = br.open(BASE_MURL+productUrl)productPageHeaders = str(productPage.info()).split('\n')productPage = ungzipResponse(productPage)jsonProductResponse = Nonefor header in productPageHeaders:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonProductResponse = json.loads(productPage)productPageSoup= Noneif jsonProductResponse is not None:productPageSoup = BeautifulSoup(jsonProductResponse['text'])else:productPageSoup = BeautifulSoup(productPage)productCodeArray = str(productPageSoup.find("form", {'class':'buy-form'}).findAll('input', recursive=False)[0]).split('"')lengthProductCodeArr= len(productCodeArray)productCode = productCodeArray[lengthProductCodeArr-2]allproductImageTags = productPageSoup.findAll(attrs={'class' : 'pd-image'})productImgUrl = ''if allproductImageTags is not None and len(allproductImageTags)>0:productImgUrl= allproductImageTags[0].get('style').split("background:url('")[1].split("')no-repeat center")[0].strip()productDetailsSubMap['productCode']=productCodeproductDetailsSubMap['imgUrl']=productImgUrlproductDetailsSubMap['subOrderStatus']=orderStatusproductDetailsSubMap['subOrderStatusTime']=statusTimeproductDetailsMap[orderId]=productDetailsSubMapreturn productDetailsMapdef _getMultiSubOrdersMap(self, orderId, soup):orderTable = soup.body.find("table", {'class':'table product-list'}).findAll('tr', recursive=False)orderTable.pop(0)productDetailsMap = {}for orderTr in orderTable:productDetailsSubMap = {}cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailssubOrderId= product_details[1].contents[0].strip()productUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()subOrderTrackingUrl = product_details[1].get('href')quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.",""))discount = long(cols[3].text.strip().replace("Rs.",""))subtotal = long(cols[4].text.strip().replace("Rs.",""))productDetailsSubMap['productUrl']=BASE_MURL+productUrlproductDetailsSubMap['productName']=productNameproductDetailsSubMap['subOrderTrackingUrl']=BASE_URL+subOrderTrackingUrlproductDetailsSubMap['sellingPrice']=sellingPriceproductDetailsSubMap['quantity']=quantityproductDetailsSubMap['discount']=discountproductDetailsSubMap['subtotal']=subtotalbr = getBrowserObject()productPage = br.open(BASE_MURL+productUrl)productPageHeaders = str(productPage.info()).split('\n')productPage = ungzipResponse(productPage)jsonProductResponse = Nonefor header in productPageHeaders:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonProductResponse = json.loads(productPage)productPageSoup= Noneif jsonProductResponse is not None:productPageSoup = BeautifulSoup(jsonProductResponse['text'])else:productPageSoup = BeautifulSoup(productPage)productCodeArray = str(productPageSoup.find("form", {'class':'buy-form'}).findAll('input', recursive=False)[0]).split('"')lengthProductCodeArr= len(productCodeArray)productCode = productCodeArray[lengthProductCodeArr-2]allproductImageTags = productPageSoup.findAll(attrs={'class' : 'pd-image'})productImgUrl = ''if allproductImageTags is not None and len(allproductImageTags)>0:productImgUrl= allproductImageTags[0].get('style').split("background:url('")[1].split("')no-repeat center")[0].strip()productDetailsSubMap['productCode']=productCodeproductDetailsSubMap['imgUrl']=productImgUrlbr1 = getBrowserObject()orderTrackingPage = br1.open(BASE_URL+subOrderTrackingUrl)headers = str(orderTrackingPage.info()).split('\n')orderTrackingPage= ungzipResponse(orderTrackingPage)jsonResponse = Nonefor header in headers:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonResponse = json.loads(orderTrackingPage)orderTrackingPageSoup = Noneif jsonResponse is not None:orderTrackingPageSoup = BeautifulSoup(str(jsonResponse['text']))else:orderTrackingPageSoup = BeautifulSoup(orderTrackingPage)subOrderStatusList = orderTrackingPageSoup.findAll(attrs={'class' : 'price ord_status'})subOrderStatus = subOrderStatusList[0].contents[0].strip()subOrderStatusTime= orderTrackingPageSoup.findAll(attrs={'class' : 'sts no_mobile'})[1].contents[2].strip()productDetailsSubMap['subOrderStatus'] = subOrderStatusproductDetailsSubMap['subOrderStatusTime'] = subOrderStatusTimeproductDetailsSubMap['parentOrderId']=orderIdproductDetailsMap[subOrderId]=productDetailsSubMapreturn productDetailsMapdef updateCashbackInSubOrders(self, subOrders):for subOrder in subOrders:cashbackStatus = Store.CB_NAcashbackAmount = 0percentage = 0amount = subOrder.amountPaidif amount > 0:(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)if cashbackAmount > 0:cashbackStatus = Store.CB_PENDINGsubOrder.cashBackStatus = cashbackStatussubOrder.cashBackAmount = cashbackAmountsubOrder.cashBackPercentage = percentagereturn subOrdersdef _parseOrders(self, orderId, subTagId, userId, page, orderSuccessUrl):soup = BeautifulSoup(page)productDetailsMap = {}orderStatusList = soup.findAll(attrs={'class' : 'price ord_status'})paymentFields = soup.findAll(attrs={'class' : 'box_paymentcalculations_row'})orderDateList = soup.findAll(attrs={'class':'price ord_date'})placedOn= orderDateList[0].text.strip().replace("\t","").replace("\n","").replace(" ","")ordersubtotal=0ordercluebucks=0ordershippingcost=0ordertotal=0for val in paymentFields:for value in val.contents:if value is not None:if 'div' in str(value).strip():if 'Subtotal' in value.text.strip():ordersubtotal = long(val.contents[3].text.strip().replace("Rs.",""))print ordersubtotalif 'Shipping Cost' in value.text.strip():ordershippingcost = long(val.contents[3].text.strip().replace("Rs.",""))print ordershippingcostif 'Clue' in value.text.strip():ordercluebucks = long(val.contents[3].text.strip().replace("Rs.",""))print ordercluebucksif 'Total' in value.text.strip():ordertotal = long(val.contents[3].text.strip().replace("Rs.",""))print ordertotalif orderStatusList is not None and len(orderStatusList)>0:orderStatus = orderStatusList[0].contents[0].strip()statusTime= soup.findAll(attrs={'class' : 'sts no_mobile'})[1].contents[2].strip()productDetailsMap = self._getSingleSubOrderMap(orderId, orderStatus, statusTime, soup, subTagId)else:productDetailsMap = self._getMultiSubOrdersMap(orderId, soup)merchantOrder = Order(orderId, userId, subTagId.split('$')[0], self.store_id, orderSuccessUrl)merchantOrder.placedOn = placedOnmerchantOrder.merchantOrderId = orderIdmerchantOrder.paidAmount = ordertotaltotalOrdersAmount = 0totalDiscount = 0subOrders= []for key in productDetailsMap:subOrderDetail = productDetailsMap.get(key)totalOrdersAmount = totalOrdersAmount + (subOrderDetail['sellingPrice'] * subOrderDetail['quantity'])totalDiscount = totalDiscount + (subOrderDetail['discount'] * subOrderDetail['quantity'])subOrder = SubOrder(subOrderDetail['productName'], subOrderDetail['productUrl'], placedOn, subOrderDetail['subtotal'])subOrder.merchantSubOrderId = keysubOrder.detailedStatus = subOrderDetail['subOrderStatus']subOrder.imgUrl = subOrderDetail['imgUrl']subOrder.offerDiscount = subOrderDetail['discount'] * subOrderDetail['quantity']subOrder.unitPrice = subOrderDetail['sellingPrice']subOrder.productCode = subOrderDetail['productCode']subOrder.amountPaid = subOrderDetail['subtotal']subOrder.quantity = subOrderDetail['quantity']subOrders.append(subOrder)print totalOrdersAmount, totalDiscountmerchantOrder.totalAmount = totalOrdersAmountmerchantOrder.discountApplied = totalDiscountmerchantOrder.deliveryCharges = ordershippingcostmerchantOrder.subOrders = self.updateCashbackInSubOrders(subOrders)return merchantOrderdef parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):#print merchantOrderresp = {}try:br = getBrowserObject()url = ORDER_TRACK_URL +'&order_id=' +str(orderId)+'&email_id='+ subTagId.split('$')[1]page = br.open(url)headers = str(page.info()).split('\n')page = ungzipResponse(page)jsonResponse = Nonefor header in headers:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonResponse = json.loads(page)if jsonResponse is not None:page = jsonResponse['text']merchantOrder = self._parseOrders(orderId, subTagId, userId, page, orderSuccessUrl)merchantOrder.orderTrackingUrl = urlif self._saveToOrder(todict(merchantOrder)):resp['result'] = 'ORDER_CREATED'else:resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'return respexcept:print "Error occurred"traceback.print_exc()resp['result'] = 'ORDER_NOT_CREATED'def parseSingleSubOrder(self, soup, emailId, subOrderId, subOrderStatus):orderDateList = soup.findAll(attrs={'class':'price ord_date'})placedOn= orderDateList[0].text.strip().replace("\t","").replace("\n","").replace(" ","")orderTable = soup.body.find("table", {'class':'table product-list'}).findAll('tr', recursive=False)orderTable.pop(0)subOrders =[]for orderTr in orderTable:cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailsproductUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.",""))discount = long(cols[3].text.strip().replace("Rs.",""))subtotal = long(cols[4].text.strip().replace("Rs.",""))br = getBrowserObject()productPage = br.open(BASE_MURL+productUrl)productPageHeaders = str(productPage.info()).split('\n')productPage = ungzipResponse(productPage)jsonProductResponse = Nonefor header in productPageHeaders:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonProductResponse = json.loads(productPage)productPageSoup= Noneif jsonProductResponse is not None:productPageSoup = BeautifulSoup(jsonProductResponse['text'])else:productPageSoup = BeautifulSoup(productPage)productCodeArray = str(productPageSoup.find("form", {'class':'buy-form'}).findAll('input', recursive=False)[0]).split('"')lengthProductCodeArr= len(productCodeArray)productCode = productCodeArray[lengthProductCodeArr-2]allproductImageTags = productPageSoup.findAll(attrs={'class' : 'pd-image'})productImgUrl = ''if allproductImageTags is not None and len(allproductImageTags)>0:productImgUrl= allproductImageTags[0].get('style').split("background:url('")[1].split("')no-repeat center")[0].strip()subOrder = SubOrder(productName, productUrl, placedOn, subtotal)subOrder.merchantSubOrderId = subOrderIdsubOrder.detailedStatus = subOrderStatussubOrder.imgUrl = productImgUrlsubOrder.offerDiscount = discount*quantitysubOrder.unitPrice = sellingPricesubOrder.productCode = productCodesubOrder.amountPaid = subtotalsubOrder.quantity = quantitysubOrders.append(subOrder)subOrders = self.updateCashbackInSubOrders(subOrders)return subOrders[0]def parseMultiSubOrders(self, soup):orderTable = soup.body.find("table", {'class':'table product-list'}).findAll('tr', recursive=False)orderTable.pop(0)orderDateList = soup.findAll(attrs={'class':'price ord_date'})placedOn= orderDateList[0].text.strip().replace("\t","").replace("\n","").replace(" ","")subOrders = []for orderTr in orderTable:productDetailsSubMap = {}cols = orderTr.find_all('td')product_details = cols[0].find_all('a')#print product_detailssubOrderId= product_details[1].contents[0].strip()productUrl = product_details[0].get('href')productName = product_details[0].contents[0].strip()subOrderTrackingUrl = product_details[1].get('href')quantity = int(cols[1].text.strip())sellingPrice = long(cols[2].text.strip().replace("Rs.",""))discount = long(cols[3].text.strip().replace("Rs.",""))subtotal = long(cols[4].text.strip().replace("Rs.",""))br = getBrowserObject()productPage = br.open(BASE_MURL+productUrl)productPageHeaders = str(productPage.info()).split('\n')productPage = ungzipResponse(productPage)jsonProductResponse = Nonefor header in productPageHeaders:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonProductResponse = json.loads(productPage)productPageSoup= Noneif jsonProductResponse is not None:productPageSoup = BeautifulSoup(jsonProductResponse['text'])else:productPageSoup = BeautifulSoup(productPage)productCodeArray = str(productPageSoup.find("form", {'class':'buy-form'}).findAll('input', recursive=False)[0]).split('"')lengthProductCodeArr= len(productCodeArray)productCode = productCodeArray[lengthProductCodeArr-2]allproductImageTags = productPageSoup.findAll(attrs={'class' : 'pd-image'})productImgUrl = ''if allproductImageTags is not None and len(allproductImageTags)>0:productImgUrl= allproductImageTags[0].get('style').split("background:url('")[1].split("')no-repeat center")[0].strip()productDetailsSubMap['productCode']=productCodeproductDetailsSubMap['imgUrl']=productImgUrlbr1 = getBrowserObject()orderTrackingPage = br1.open(BASE_URL+subOrderTrackingUrl)headers = str(orderTrackingPage.info()).split('\n')orderTrackingPage= ungzipResponse(orderTrackingPage)jsonResponse = Nonefor header in headers:header = header.split(':')if header[0] == 'Content-Type' and 'json' in header[1]:jsonResponse = json.loads(orderTrackingPage)orderTrackingPageSoup = Noneif jsonResponse is not None:orderTrackingPageSoup = BeautifulSoup(str(jsonResponse['text']))else:orderTrackingPageSoup = BeautifulSoup(orderTrackingPage)subOrderStatusList = orderTrackingPageSoup.findAll(attrs={'class' : 'price ord_status'})subOrderStatus = subOrderStatusList[0].contents[0].strip()subOrderStatusTime= orderTrackingPageSoup.findAll(attrs={'class' : 'sts no_mobile'})[1].contents[2].strip()subOrder = SubOrder(productName, productUrl, placedOn, subtotal)subOrder.merchantSubOrderId = subOrderIdsubOrder.detailedStatus = subOrderStatussubOrder.imgUrl = productImgUrlsubOrder.offerDiscount = discount*quantitysubOrder.unitPrice = sellingPricesubOrder.productCode = productCodesubOrder.amountPaid = subtotalsubOrder.quantity = quantitysubOrders.append(subOrder)return self.updateCashbackInSubOrders(subOrders)def scrapeStoreOrders(self,):#collectionMap = {'palcedOn':1}searchMap = {}collectionMap = {"orderTrackingUrl":1}orders = self._getActiveOrders(searchMap,collectionMap)for order in orders:print "Order", self.store_name, order['orderId'], order['orderTrackingUrl']url = order['orderTrackingUrl']page = fetchResponseUsingProxy(url)soup = BeautifulSoup(page)bulk = self.db.merchantOrder.initialize_ordered_bulk_op()closed = TrueorderStatusList = soup.findAll(attrs={'class' : 'price ord_status'})if orderStatusList is not None and len(orderStatusList)>0:subOrderId = soup.findAll(attrs={'class':'price ord_no'})[0].text.strip()subOrder = self._isSubOrderActive(order, subOrderId)orderStatus = orderStatusList[0].contents[0].strip()if subOrder is None:try:subOrder = self.parseSingleSubOrder(soup, order['orderTrackingUrl'].split('email_id=')[1], subOrderId, orderStatus)if subOrder is None:continueself.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict([subOrder])}}})print "Added new suborders to Order id - ", order['orderId']closed = Falseexcept:passcontinueelif subOrder['closed']:continueelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": subOrderId}updateMap = {}updateMap["subOrders.$.detailedStatus"] = orderStatusstatus = self._getStatusFromDetailedStatus(orderStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]if status is not None:updateMap["subOrders.$.status"] = statusif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falsebulk.find(findMap).update({'$set' : updateMap})else:subOrdersList = self.parseMultiSubOrders(soup)for subOrderObj in subOrdersList:subOrder = self._isSubOrderActive(order, subOrderObj.merchantSubOrderId)if subOrder is None:self.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":{"$each":todict([subOrderObj])}}})print "Added new suborders to Order id - ", order['orderId']closed = Falsecontinueelif subOrder['closed']:continueelse:findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": subOrderId}updateMap = {}updateMap["subOrders.$.detailedStatus"] = subOrderObj.detailedStatusstatus = self._getStatusFromDetailedStatus(orderStatus)closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]if status is not None:updateMap["subOrders.$.status"] = statusif closedStatus:#if status is closed then change the paybackStatus accordinglyupdateMap["subOrders.$.closed"] = Trueif status == Store.ORDER_DELIVERED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVEDelif status == Store.ORDER_CANCELLED:if subOrder.get("cashBackStatus") == Store.CB_PENDING:updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLEDelse:closed = Falsebulk.find(findMap).update({'$set' : updateMap})bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})result = bulk.execute()tprint(result)def saveToAffiliate(self, offers):raise NotImplementedErrordef todict(obj, classkey=None):if isinstance(obj, dict):data = {}for (k, v) in obj.items():data[k] = todict(v, classkey)return dataelif hasattr(obj, "_ast"):return todict(obj._ast())elif hasattr(obj, "__iter__"):return [todict(v, classkey) for v in obj]elif hasattr(obj, "__dict__"):data = dict([(key, todict(value, classkey))for key, value in obj.__dict__.iteritems()if not callable(value) and not key.startswith('_')])if classkey is not None and hasattr(obj, "__class__"):data[classkey] = obj.__class__.__name__return dataelse:return obj