Blame | Last modification | View Log | RSS feed
'''Created on Jan 15, 2015@author: amit'''from bson.binary import Binaryfrom dtr import mainfrom dtr.dao import AffiliateInfofrom dtr.main import getBrowserObject, ScrapeException, getStore, ParseExceptionfrom pymongo import MongoClientfrom BeautifulSoup import BeautifulSoupimport datetimeimport jsonimport mechanizeimport pymongoimport reimport urllibimport soupselect;soupselect.monkeypatch()USERNAME='saholic1@gmail.com'PASSWORD='spice@2020'AFFILIATE_URL='http://affiliate.snapdeal.com'POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'class Store(main.Store):'''This is to map order statuses of our system to order statuses of snapdeal.And our statuses will change accordingly.'''OrderStatusMap = {main.Store.ORDER_PLACED : ['In Progress','N/A'],main.Store.ORDER_DELIVERED : ['Delivered'],main.Store.ORDER_SHIPPED : ['In Transit'],main.Store.ORDER_CANCELLED : ['Closed For Vendor Reallocation']}def __init__(self,store_id):super(Store, self).__init__(store_id)def getName(self):return "snapdeal"def scrapeAffiliate(self, startDate=None, endDate=None):br = getBrowserObject()br.open(AFFILIATE_URL)br.select_form(nr=0)br.form['data[User][password]'] = PASSWORDbr.form['data[User][email]'] = USERNAMEbr.submit()response = br.open(CONFIG_URL)token = re.findall('"session_token":"(.*?)"', ungzipResponse(response, br), re.IGNORECASE)[0]allOffers = self._getAllOffers(br, token)allPyOffers = [self.covertToObj(offer).__dict__ for offer in allOffers]self._saveToAffiliate(allPyOffers)def parseOrderPage(self, htmlString=None):raise NotImplementedErrordef parseOrderRawHtml(self, orderId, subtagId, userId, rawHtml, orderSuccessUrl):br = getBrowserObject()url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]print urlresponse = br.open(url)page = ungzipResponse(response, br)#page=page.decode("utf-8")soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)#orderHead = soup.find(name, attrs, recursive, text)sections = soup.findAll("section")#print sectionsorder = sections[1]orderTrs = order.findAll("tr")placedOn = str(orderTrs[0].findAll("td")[1].text)#Pop two section elementssections.pop(0)sections.pop(0)subOrders = sectionsmerchantOrder = {}merchantSubOrders = []merchantOrder["_id"] = orderIdmerchantOrder["userId"] = userIdmerchantOrder['subTagId'] = subtagIdmerchantOrder['merchantOderId'] = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1]merchantOrder['placedOn'] = placedOnfor orderTr in orderTrs:orderTrString = str(orderTr)if "Total Amount" in orderTrString:merchantOrder['totalAmount'] = re.findall(r'\d+', orderTrString)[0]elif "Delivery Charges" in orderTrString:merchantOrder['deliveryCharges'] = re.findall(r'\d+', orderTrString)[0]elif "Discount Applied" in orderTrString:merchantOrder['discountApplied'] = re.findall(r'\d+', orderTrString)[0]elif "Paid Amount" in orderTrString:merchantOrder['paidAmount'] = re.findall(r'\d+', orderTrString)[0]merchantOrder['closed'] = Falsefor subOrderElement in subOrders:productUrl = str(subOrderElement.find("a")['href'])subTable = subOrderElement.find("table", {"class":"lrPad"})subTrs = subTable.findAll("tr")unitPrice=NoneofferDiscount = NonedeliveryCharges = NoneamountPaid = Nonefor subTr in subTrs:subTrString = str(subTr)if "Unit Price" in subTrString:unitPrice = re.findall(r'\d+', subTrString)[0]if "Quantity" in subTrString:qty = re.findall(r'\d+', subTrString)[0]elif "Offer Discount" in subTrString:offerDiscount = re.findall(r'\d+', subTrString)[0]elif "Delivery Charges" in subTrString:deliveryCharges = re.findall(r'\d+', subTrString)[0]elif "Subtotal" in subTrString:amountPaid = str(int(re.findall(r'\d+', subTrString)[0])/int(qty))divs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})if len(divs)<=0:raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")for div in divs:merchantSubOrder = {}merchantSubOrder['placedOn'] = placedOnmerchantSubOrder['productTitle'] = str(subOrderElement.find("a").text)merchantSubOrder['productUrl'] = "http://m.snapdeal.com/" + productUrlmerchantSubOrder['productCode'] = re.findall(r'\d+$', productUrl)[0]merchantSubOrder['quantity'] = 1merchantSubOrder['status'] = 'Order Placed'merchantSubOrder['amountPaid'] = amountPaidmerchantSubOrder['deliveryCharges'] = deliveryChargesmerchantSubOrder['offerDiscount'] = offerDiscountmerchantSubOrder['unitPrice'] = unitPricetrackAnchor = div.find("a")if trackAnchor is not None:merchantSubOrder['tracingkUrl'] = str(trackAnchor['href'])divStr = str(div)divStr = divStr.replace("\n","").replace("\t", "")for line in divStr.split("<br />"):if "Suborder ID" in line:merchantSubOrder['merchantSubOrderId'] = re.findall(r'\d+', line)[0]elif "Status" in line:merchantSubOrder['detailedStatus'] = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]elif "Est. Shipping Date" in line:merchantSubOrder['estimatedShippingDate'] = line.split(":")[1].strip()elif "Est. Delivery Date" in line:merchantSubOrder['estimatedDeliveryDate'] = line.split(":")[1].strip()elif "Courier Name" in line:merchantSubOrder['courierName'] = line.split(":")[1].strip()elif "Tracking No" in line:merchantSubOrder['trackingNumber'] = line.split(":")[1].strip()merchantSubOrders.append(merchantSubOrder)merchantOrder['subOrders'] = merchantSubOrdersprint merchantOrder#soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)#soup.find(name, attrs, recursive, text)def scrapeStoreOrders(self,):pass"""This will insert records with changes only"""def _saveToAffiliate(self, offers):client = MongoClient('mongodb://localhost:27017/')db = client.dtrcollection = db.snapdealOrderAffiliateInfotry:collection.insert(offers,continue_on_error=True)except pymongo.errors.DuplicateKeyError as e:print e.detailsdef _getAllOffers(self, br, token):allOffers = []nextPage = 1while True:data = getPostData(token, nextPage)response = br.open(POST_URL, data)rmap = json.loads(ungzipResponse(response, br))if rmap is not None:rmap = rmap['response']if rmap is not None and len(rmap['errors'])==0:allOffers += rmap['data']['data']print allOffersnextPage += 1if rmap['data']['pageCount']<nextPage:breakreturn allOffersdef covertToObj(self,offer):offerData = offer['Stat']offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'],offerData['datetime'], offerData['payout'], offer['Offer']['name'], offerData['ip'], offerData['conversion_sale_amount'])return offer1def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):endDate=datetime.date.today() + datetime.timedelta(days=1)startDate=endDate - datetime.timedelta(days=31)parameters = (("page",str(page)),("limit",str(limit)),("fields[]","Stat.offer_id"),("fields[]","Stat.datetime"),("fields[]","Offer.name"),("fields[]","Stat.conversion_status"),("fields[]","Stat.conversion_sale_amount"),("fields[]","Stat.payout"),("fields[]","Stat.ip"),("fields[]","Stat.ad_id"),("fields[]","Stat.affiliate_info1"),("sort[Stat.datetime]","desc"),("filters[Stat.date][conditional]","BETWEEN"),("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),("data_start",startDate.strftime('%Y-%m-%d')),("data_end",endDate.strftime('%Y-%m-%d')),("Method","getConversions"),("NetworkId","jasper"),("SessionToken",token),)#Encode the parametersreturn urllib.urlencode(parameters)def main():print "hello"store = getStore(3)#store.scrapeAffiliate()#with open ("data.txt", "r") as myfile:# data=myfile.read()# myfile.close()store.parseOrderRawHtml(12345, "subtagId", 122323, "html", 'https://m.snapdeal.com/purchaseMobileComplete?code=1f4166d13ea799b65aa9dea68b3e9e70&order=4509499363')def ungzipResponse(r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()return htmlif __name__ == '__main__':main()