Subversion Repositories SmartDukaan

Rev

Rev 13809 | Rev 14239 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on Jan 15, 2015

@author: amit
'''
from BeautifulSoup import BeautifulSoup
from bson.binary import Binary
from datetime import datetime, date, timedelta
from dtr import main
from dtr.dao import AffiliateInfo, Order, SubOrder
from dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, Store as MStore,\
    ungzipResponse
from pprint import pprint
from pymongo import MongoClient
import json
import pymongo
import re
import traceback
import urllib

USERNAME='profittill2@gmail.com'
PASSWORD='spice@2020'
AFFILIATE_URL='http://affiliate.snapdeal.com'
POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'
ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'
CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'

class Store(MStore):
    
    '''
    This is to map order statuses of our system to order statuses of snapdeal.
    And our statuses will change accordingly.
    
    '''
    OrderStatusMap = {
                      MStore.ORDER_PLACED : ['In Progress','N/A'],
                      MStore.ORDER_DELIVERED : ['Delivered'],
                      MStore.ORDER_SHIPPED : ['In Transit'],
                      MStore.ORDER_CANCELLED : ['Closed For Vendor Reallocation', 'Cancelled', 'Product returned by courier', 'Returned']
                      }
    
    CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICE
    def __init__(self,store_id):
        super(Store, self).__init__(store_id)

    def getName(self):
        return "snapdeal"
    
    def scrapeAffiliate(self, startDate=None, endDate=None):
        br = getBrowserObject()
        br.open(AFFILIATE_URL)
        br.select_form(nr=0)
        br.form['data[User][password]'] = PASSWORD 
        br.form['data[User][email]'] = USERNAME
        br.submit()
        response = br.open(CONFIG_URL)
        
        token =  re.findall('"session_token":"(.*?)"', ungzipResponse(response), re.IGNORECASE)[0]
        
        allOffers = self._getAllOffers(br, token)
        
        allPyOffers = []
        maxSaleDate = self._getLastSaleDate()
        newMaxSaleDate = maxSaleDate
        for offer in allOffers:
            pyOffer = self.covertToObj(offer).__dict__
            allPyOffers.append(pyOffer)
            saleDate = datetime.strptime(pyOffer['saleDate'],"%Y-%m-%d %H:%M:%S")
            if maxSaleDate < saleDate:
                self._updateOrdersPayBackStatus({'subTagId':pyOffer['subTagId'], 'saleDate':pyOffer['saleDate']}, {})
                if newMaxSaleDate < saleDate:
                    newMaxSaleDate = saleDate
                    
        self._setLastSaleDate(newMaxSaleDate)
        self._saveToAffiliate(allPyOffers)
    
    def _setLastSaleDate(self, saleDate):
        self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})
    
        
        
    def _getLastSaleDate(self,):
        lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})
        if lastDaySaleObj is None:
            return datetime.min
        
    def _parse(self, orderId, subTagId, userId, page, orderSuccessUrl):
        
        #page=page.decode("utf-8")
        soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
        #orderHead = soup.find(name, attrs, recursive, text)
        sections = soup.findAll("section")
        
        #print sections
        
        order = sections[1]
        orderTrs = order.findAll("tr")
        
        placedOn = str(orderTrs[0].findAll("td")[1].text)
        
        #Pop two section elements
        sections.pop(0) 
        sections.pop(0)
        subOrders = sections
        
         
        merchantSubOrders = []

        merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
        merchantOrder.merchantOrderId = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1]
        for orderTr in orderTrs:
            orderTrString = str(orderTr)
            if "Total Amount" in orderTrString:
                merchantOrder.totalAmount = re.findall(r'\d+', orderTrString)[0]
            elif "Delivery Charges" in orderTrString:
                merchantOrder.deliveryCharges = re.findall(r'\d+', orderTrString)[0]
            elif "Discount Applied" in orderTrString:
                merchantOrder.discountApplied = re.findall(r'\d+', orderTrString)[0]
            elif "Paid Amount" in orderTrString:
                merchantOrder.paidAmount = re.findall(r'\d+', orderTrString)[0]

        for subOrderElement in subOrders:
            subOrders = self.parseSubOrder(subOrderElement, placedOn)                           
            merchantSubOrders.extend(subOrders)   
        
        merchantOrder.subOrders = merchantSubOrders
        return merchantOrder
        
    def parseSubOrder(self, subOrderElement, placedOn):
        subOrders = []
        productUrl = str(subOrderElement.find("a")['href'])
        subTable = subOrderElement.find("table", {"class":"lrPad"})
        subTrs = subTable.findAll("tr")
        unitPrice=None
        offerDiscount = None
        deliveryCharges = None
        amountPaid = None
        for subTr in subTrs:
            subTrString = str(subTr)
            if "Unit Price" in subTrString:
                unitPrice = re.findall(r'\d+', subTrString)[0]
            if "Quantity" in subTrString:
                qty = re.findall(r'\d+', subTrString)[0]
            elif "Offer Discount" in subTrString:
                offerDiscount =   re.findall(r'\d+', subTrString)[0]
            elif "Delivery Charges" in subTrString:
                deliveryCharges =   re.findall(r'\d+', subTrString)[0]
            elif "Subtotal" in subTrString:
                if int(qty) > 0:
                    amountPaid =   str(int(re.findall(r'\d+', subTrString)[0])/int(qty))
                else:
                    amountPaid =   "0"
        if self.CONF_CB_AMOUNT == MStore.CONF_CB_SELLING_PRICE or offerDiscount is None:
            amount = int(unitPrice)
        else:
            amount = int(unitPrice) - int(offerDiscount)
                
        divs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})
        if len(divs)<=0:
            raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")
        
        for div in divs:
            productTitle = str(subOrderElement.find("a").text)
            productUrl = "http://m.snapdeal.com/" + productUrl 
            subOrder = SubOrder(productTitle, productUrl, placedOn, amountPaid)

            subOrder.amountPaid = amountPaid
            subOrder.deliveryCharges = deliveryCharges
            subOrder.offerDiscount = offerDiscount
            subOrder.unitPrice = int(unitPrice)
            subOrder.productCode = re.findall(r'\d+$', productUrl)[0]
            (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)
            cashbackStatus = Store.CB_PENDING
            if cashbackAmount <= 0:
                cashbackStatus = Store.CB_NA
            subOrder.cashBackStatus = cashbackStatus
            subOrder.cashBackAmount = cashbackAmount
            if percentage > 0:
                subOrder.cashBackPercentage = percentage
            
            
            trackAnchor = div.find("a")   
            if trackAnchor is not None:
                subOrder.tracingkUrl = str(trackAnchor['href'])
            
            divStr = str(div)
            divStr = divStr.replace("\n","").replace("\t", "")
            
            for line in divStr.split("<br />"):
                if "Suborder ID" in line:
                    subOrder.merchantSubOrderId = re.findall(r'\d+', line)[0]   
                elif "Status" in line:
                    subOrder.detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]
                elif "Est. Shipping Date" in line:
                    subOrder.estimatedShippingDate = line.split(":")[1].strip()
                elif "Est. Delivery Date" in line:
                    subOrder.estimatedDeliveryDate = line.split(":")[1].strip()
                elif "Courier Name" in line:
                    subOrder.courierName = line.split(":")[1].strip()
                elif "Tracking No" in line:
                    subOrder.trackingNumber = line.split(":")[1].strip()
            subOrders.append(subOrder)
        return subOrders

    def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):
                    #print merchantOrder
        resp = {}
        try:
            br = getBrowserObject()
            url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]
            page = br.open(url)
            page = ungzipResponse(page)
            merchantOrder = self._parse(orderId, subTagId, userId, page, orderSuccessUrl)   
            self._saveToOrder(todict(merchantOrder))
            resp['result'] = 'ORDER_CREATED'
            return resp
        except:
            print "Error occurred"
            traceback.print_exc()
            resp['result'] = 'PARSE_ERROR'
            return resp
             
        
        #soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)
        #soup.find(name, attrs, recursive, text)

    def _getStatusFromDetailedStatus(self, detailedStatus):
        for key, value in Store.OrderStatusMap.iteritems():
            if detailedStatus in value:
                return key
            print "Detailed Status need to be mapped"
        raise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)
    
    
    def scrapeStoreOrders(self,):
        #collectionMap = {'palcedOn':1}
        orders = self._getActiveOrders()
        print "Found orders", orders
        br = getBrowserObject()
        for order in orders:
            url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', order['orderSuccessUrl'],re.IGNORECASE)[0]
            response = br.open(url)
            page = ungzipResponse(response)
            #page=page.decode("utf-8")
            soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
            sections = soup.findAll("section")
            orderEl = sections[1]
            orderTrs = orderEl.findAll("tr")
            
            placedOn = str(orderTrs[0].findAll("td")[1].text)
            sections.pop(0)
            sections.pop(0)
            
            subOrders = sections
            bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
            for subOrderElement in subOrders:
                closed = True
                divs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})
                if len(divs)<=0:
                    raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")
                subOrder = None
                breakFlag = False
                for div in divs:
                    divStr = str(div)
                    divStr = divStr.replace("\n","").replace("\t", "")
                    updateMap = {}
                    for line in divStr.split("<br />"):
                        if "Suborder ID" in line:
                            merchantSubOrderId = re.findall(r'\d+', line)[0]
                            #break if suborder is inactive   
                            subOrder =  self._isSubOrderActive(order, merchantSubOrderId)
                            if subOrder is None:
                                subOrders = self.parseSubOrder(subOrderElement, placedOn)
                                self.db.merchantOrder.update({"orderId":order['orderId']},{'$push':{"subOrders":todict(subOrders)}})
                                print "Added new suborder with subOrder Id:", subOrder.merchantSubOrderId
                                closed = False
                                return
                            elif subOrder['closed']:
                                breakFlag = True
                                break
                            else: 
                                findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": merchantSubOrderId}
                        elif "Status" in line:
                            detailedStatus = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]
                            updateMap["subOrders.$.detailedStatus"] = detailedStatus
                            status = self._getStatusFromDetailedStatus(detailedStatus) 
                            closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]
                            updateMap["subOrders.$.status"] = status
                            if detailedStatus == 'Closed For Vendor Reallocation':
                                #if it is more than 6hours mark closed.
                                closeAt = subOrder.get("closeAt") 
                                if closeAt is None:
                                    closeAt = datetime.now() + timedelta(hours=6)
                                    updateMap["subOrders.$.closeAt"] = datetime.strftime(closeAt,"%Y-%m-%d %H:%M:%S")
                                else:
                                    closeAt = datetime.strptime(closeAt,"%Y-%m-%d %H:%M:%S")
                                    if datetime.now() > closeAt:
                                        closedStatus = True
                                    
                                    
                            if closedStatus:
                                #if status is closed then change the paybackStatus accordingly
                                updateMap["subOrders.$.closed"] = True
                                if status == Store.ORDER_DELIVERED:
                                    if subOrder.get("cashBackStatus") == Store.CB_PENDING:
                                        updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVED
                                elif status == Store.ORDER_CANCELLED:
                                    if subOrder.get("cashBackStatus") == Store.CB_PENDING:
                                        updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLED
                                    
                            else:
                                closed = False
                        elif "Est. Shipping Date" in line:
                            estimatedShippingDate = line.split(":")[1].strip()
                            updateMap["subOrders.$.estimatedShippingDate"] = estimatedShippingDate
                        elif "Est. Delivery Date" in line:
                            estimatedDeliveryDate = line.split(":")[1].strip()
                            updateMap["subOrders.$.estimatedDeliveryDate"] = estimatedDeliveryDate
                        elif "Courier Name" in line:
                            courierName = line.split(":")[1].strip()
                            updateMap["subOrders.$.courierName"] = courierName
                        elif "Tracking No" in line:
                            trackingNumber = line.split(":")[1].strip()
                            updateMap["subOrders.$.trackingNumber"] = trackingNumber

                    if breakFlag:
                        break
                            
                    bulk.find(findMap).update({'$set' : updateMap})
                bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed}})
            result = bulk.execute()
            pprint(result)        
            
    
    def _saveToAffiliate(self, offers):
        if offers is None or len(offers)==0:
            print "no affiliate have been pushed"
            return
        collection = self.db.snapdealOrderAffiliateInfo
        try:
            collection.insert(offers,continue_on_error=True)
        except pymongo.errors.DuplicateKeyError as e:
            print e.details
            
    
    def _getAllOffers(self, br, token):
        allOffers = []
        nextPage = 1  
        while True:
            data = getPostData(token, nextPage)
            response = br.open(POST_URL, data)
            rmap = json.loads(ungzipResponse(response))
            if rmap is not None:
                rmap = rmap['response']
                if rmap is not None and len(rmap['errors'])==0:
                    allOffers += rmap['data']['data']
            nextPage += 1
            if rmap['data']['pageCount']<nextPage:
                break
        
        return allOffers
    
    def covertToObj(self,offer):
        offerData = offer['Stat']
        offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'], 
                              offerData['datetime'], offerData['payout'], offer['Offer']['name'], offerData['ip'], offerData['conversion_sale_amount'])
        
        return offer1
def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):
    endDate=date.today() + timedelta(days=1)
    startDate=endDate - timedelta(days=31)

    parameters = (
        ("page",str(page)),
        ("limit",str(limit)),
        ("fields[]","Stat.offer_id"),
        ("fields[]","Stat.datetime"),
        ("fields[]","Offer.name"),
        ("fields[]","Stat.conversion_status"),
        ("fields[]","Stat.conversion_sale_amount"),
        ("fields[]","Stat.payout"),
        ("fields[]","Stat.ip"),
        ("fields[]","Stat.ad_id"),
        ("fields[]","Stat.affiliate_info1"),
        ("sort[Stat.datetime]","desc"),
        ("filters[Stat.date][conditional]","BETWEEN"),
        ("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),
        ("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),
        ("data_start",startDate.strftime('%Y-%m-%d')),
        ("data_end",endDate.strftime('%Y-%m-%d')),
        ("Method","getConversions"),
        ("NetworkId","jasper"),
        ("SessionToken",token),
    )
    #Encode the parameters
    return urllib.urlencode(parameters)

def main():
    
    store = getStore(3)
    store.scrapeStoreOrders()
    #store._isSubOrderActive(8, "5970688907")
    #store.scrapeAffiliate()
    #store.parseOrderRawHtml(12345, "subtagId", 122323,  "html", 'https://m.snapdeal.com/purchaseMobileComplete?code=1f4166d13ea799b65aa9dea68b3e9e70&order=4509499363')


if __name__ == '__main__':
    main()

def todict(obj, classkey=None):
    if isinstance(obj, dict):
        data = {}
        for (k, v) in obj.items():
            data[k] = todict(v, classkey)
        return data
    elif hasattr(obj, "_ast"):
        return todict(obj._ast())
    elif hasattr(obj, "__iter__"):
        return [todict(v, classkey) for v in obj]
    elif hasattr(obj, "__dict__"):
        data = dict([(key, todict(value, classkey)) 
            for key, value in obj.__dict__.iteritems() 
            if not callable(value) and not key.startswith('_')])
        if classkey is not None and hasattr(obj, "__class__"):
            data[classkey] = obj.__class__.__name__
        return data
    else:
        return obj