Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
13566 amit.gupta 1
'''
2
Created on Jan 15, 2015
3
 
4
@author: amit
5
'''
6
from bson.binary import Binary
7
from dtr import main
8
from dtr.dao import AffiliateInfo
9
from dtr.main import getBrowserObject, ScrapeException, getStore, ParseException
10
from pymongo import MongoClient
11
from  BeautifulSoup import BeautifulSoup
12
import datetime
13
import json
14
import mechanize
15
import pymongo
16
import re
17
import urllib
18
import soupselect;soupselect.monkeypatch()
19
 
20
USERNAME='saholic1@gmail.com'
21
PASSWORD='spice@2020'
22
AFFILIATE_URL='http://affiliate.snapdeal.com'
23
POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'
24
ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'
25
CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'
26
 
27
 
28
class Store(main.Store):
29
 
30
    '''
31
    This is to map order statuses of our system to order statuses of snapdeal.
32
    And our statuses will change accordingly.
33
 
34
    '''
35
    OrderStatusMap = {
36
                      main.Store.ORDER_PLACED : ['In Progress','N/A'],
37
                      main.Store.ORDER_DELIVERED : ['Delivered'],
38
                      main.Store.ORDER_SHIPPED : ['In Transit'],
39
                      main.Store.ORDER_CANCELLED : ['Closed For Vendor Reallocation']
40
 
41
                      }
42
    def __init__(self,store_id):
43
        super(Store, self).__init__(store_id)
44
 
45
    def getName(self):
46
        return "snapdeal"
47
 
48
    def scrapeAffiliate(self, startDate=None, endDate=None):
49
        br = getBrowserObject()
50
        br.open(AFFILIATE_URL)
51
        br.select_form(nr=0)
52
        br.form['data[User][password]'] = PASSWORD 
53
        br.form['data[User][email]'] = USERNAME
54
        br.submit()
55
        response = br.open(CONFIG_URL)
56
 
57
        token =  re.findall('"session_token":"(.*?)"', ungzipResponse(response, br), re.IGNORECASE)[0]
58
 
59
        allOffers = self._getAllOffers(br, token)
60
 
61
        allPyOffers = [self.covertToObj(offer).__dict__ for offer in allOffers]
62
        self._saveToAffiliate(allPyOffers)
63
 
64
 
65
    def parseOrderPage(self, htmlString=None): 
66
        raise NotImplementedError
67
 
68
 
69
    def parseOrderRawHtml(self, orderId, subtagId, userId, rawHtml, orderSuccessUrl):
70
        br = getBrowserObject()
71
        url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]
72
        print url
73
        response = br.open(url)
74
        page = ungzipResponse(response, br)
75
        #page=page.decode("utf-8")
76
        soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
77
        #orderHead = soup.find(name, attrs, recursive, text)
78
        sections = soup.findAll("section")
79
 
80
        #print sections
81
 
82
        order = sections[1]
83
        orderTrs = order.findAll("tr")
84
 
85
        placedOn = str(orderTrs[0].findAll("td")[1].text)
86
 
87
        #Pop two section elements
88
        sections.pop(0) 
89
        sections.pop(0)
90
        subOrders = sections
91
 
92
        merchantOrder = {}
93
        merchantSubOrders = []
94
        merchantOrder["_id"] = orderId
95
        merchantOrder["userId"] = userId
96
        merchantOrder['subTagId'] = subtagId
97
        merchantOrder['merchantOderId'] = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1] 
98
        merchantOrder['placedOn'] = placedOn
99
        for orderTr in orderTrs:
100
            orderTrString = str(orderTr)
101
            if "Total Amount" in orderTrString:
102
                merchantOrder['totalAmount'] = re.findall(r'\d+', orderTrString)[0]
103
            elif "Delivery Charges" in orderTrString:
104
                merchantOrder['deliveryCharges'] = re.findall(r'\d+', orderTrString)[0]
105
            elif "Discount Applied" in orderTrString:
106
                merchantOrder['discountApplied'] = re.findall(r'\d+', orderTrString)[0]
107
            elif "Paid Amount" in orderTrString:
108
                merchantOrder['paidAmount'] = re.findall(r'\d+', orderTrString)[0]
109
 
110
        merchantOrder['closed'] = False        
111
 
112
        for subOrderElement in subOrders:
113
            productUrl = str(subOrderElement.find("a")['href'])
114
            subTable = subOrderElement.find("table", {"class":"lrPad"})
115
            subTrs = subTable.findAll("tr")
116
            unitPrice=None
117
            offerDiscount = None
118
            deliveryCharges = None
119
            amountPaid = None
120
            for subTr in subTrs:
121
                subTrString = str(subTr)
122
                if "Unit Price" in subTrString:
123
                    unitPrice = re.findall(r'\d+', subTrString)[0]
124
                if "Quantity" in subTrString:
125
                    qty = re.findall(r'\d+', subTrString)[0]
126
                elif "Offer Discount" in subTrString:
127
                    offerDiscount =   re.findall(r'\d+', subTrString)[0]
128
                elif "Delivery Charges" in subTrString:
129
                    deliveryCharges =   re.findall(r'\d+', subTrString)[0]
130
                elif "Subtotal" in subTrString:
131
                    amountPaid =   str(int(re.findall(r'\d+', subTrString)[0])/int(qty))
132
 
133
            divs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})
134
            if len(divs)<=0:
135
                raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")
136
 
137
            for div in divs:
138
                merchantSubOrder = {}
139
                merchantSubOrder['placedOn'] = placedOn
140
                merchantSubOrder['productTitle'] = str(subOrderElement.find("a").text)
141
                merchantSubOrder['productUrl'] = "http://m.snapdeal.com/" + productUrl 
142
                merchantSubOrder['productCode'] = re.findall(r'\d+$', productUrl)[0]
143
                merchantSubOrder['quantity'] =   1                
144
                merchantSubOrder['status'] = 'Order Placed'
145
                merchantSubOrder['amountPaid'] = amountPaid
146
                merchantSubOrder['deliveryCharges'] = deliveryCharges
147
                merchantSubOrder['offerDiscount'] = offerDiscount
148
                merchantSubOrder['unitPrice'] = unitPrice
149
 
150
                trackAnchor = div.find("a")   
151
                if trackAnchor is not None:
152
                    merchantSubOrder['tracingkUrl'] = str(trackAnchor['href'])
153
 
154
                divStr = str(div)
155
                divStr = divStr.replace("\n","").replace("\t", "")
156
 
157
                for line in divStr.split("<br />"):
158
                    if "Suborder ID" in line:
159
                        merchantSubOrder['merchantSubOrderId'] = re.findall(r'\d+', line)[0]   
160
                    elif "Status" in line:
161
                        merchantSubOrder['detailedStatus'] = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]
162
                    elif "Est. Shipping Date" in line:
163
                        merchantSubOrder['estimatedShippingDate'] = line.split(":")[1].strip()
164
                    elif "Est. Delivery Date" in line:
165
                        merchantSubOrder['estimatedDeliveryDate'] = line.split(":")[1].strip()
166
                    elif "Courier Name" in line:
167
                        merchantSubOrder['courierName'] = line.split(":")[1].strip()
168
                    elif "Tracking No" in line:
169
                        merchantSubOrder['trackingNumber'] = line.split(":")[1].strip()
170
 
171
            merchantSubOrders.append(merchantSubOrder)   
172
 
173
        merchantOrder['subOrders'] = merchantSubOrders
174
        print merchantOrder
175
        #soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)
176
        #soup.find(name, attrs, recursive, text)
177
 
178
 
179
 
180
 
181
 
182
    def scrapeStoreOrders(self,):
183
 
184
        pass
185
 
186
 
187
 
188
 
189
 
190
 
191
    """
192
    This will insert records with changes only 
193
    """
194
    def _saveToAffiliate(self, offers):
195
        client = MongoClient('mongodb://localhost:27017/')
196
        db = client.dtr
197
        collection = db.snapdealOrderAffiliateInfo
198
        try:
199
            collection.insert(offers,continue_on_error=True)
200
        except pymongo.errors.DuplicateKeyError as e:
201
            print e.details
202
 
203
 
204
    def _getAllOffers(self, br, token):
205
        allOffers = []
206
        nextPage = 1  
207
        while True:
208
            data = getPostData(token, nextPage)
209
            response = br.open(POST_URL, data)
210
            rmap = json.loads(ungzipResponse(response, br))
211
            if rmap is not None:
212
                rmap = rmap['response']
213
                if rmap is not None and len(rmap['errors'])==0:
214
                    allOffers += rmap['data']['data']
215
                    print allOffers
216
            nextPage += 1
217
            if rmap['data']['pageCount']<nextPage:
218
                break
219
 
220
        return allOffers
221
 
222
    def covertToObj(self,offer):
223
        offerData = offer['Stat']
224
        offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'], 
225
                              offerData['datetime'], offerData['payout'], offer['Offer']['name'], offerData['ip'], offerData['conversion_sale_amount'])
226
        return offer1
227
def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):
228
    endDate=datetime.date.today() + datetime.timedelta(days=1)
229
    startDate=endDate - datetime.timedelta(days=31)
230
 
231
    parameters = (
232
        ("page",str(page)),
233
        ("limit",str(limit)),
234
        ("fields[]","Stat.offer_id"),
235
        ("fields[]","Stat.datetime"),
236
        ("fields[]","Offer.name"),
237
        ("fields[]","Stat.conversion_status"),
238
        ("fields[]","Stat.conversion_sale_amount"),
239
        ("fields[]","Stat.payout"),
240
        ("fields[]","Stat.ip"),
241
        ("fields[]","Stat.ad_id"),
242
        ("fields[]","Stat.affiliate_info1"),
243
        ("sort[Stat.datetime]","desc"),
244
        ("filters[Stat.date][conditional]","BETWEEN"),
245
        ("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),
246
        ("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),
247
        ("data_start",startDate.strftime('%Y-%m-%d')),
248
        ("data_end",endDate.strftime('%Y-%m-%d')),
249
        ("Method","getConversions"),
250
        ("NetworkId","jasper"),
251
        ("SessionToken",token),
252
    )
253
    #Encode the parameters
254
    return urllib.urlencode(parameters)
255
 
256
def main():
257
    print "hello"
258
    store = getStore(3)
259
    #store.scrapeAffiliate()
260
    #with open ("data.txt", "r") as myfile:
261
    #    data=myfile.read()
262
    #    myfile.close()
263
 
264
    store.parseOrderRawHtml(12345, "subtagId", 122323,  "html", 'https://m.snapdeal.com/purchaseMobileComplete?code=1f4166d13ea799b65aa9dea68b3e9e70&order=4509499363')
265
 
266
def ungzipResponse(r,b):
267
    headers = r.info()
268
    if headers['Content-Encoding']=='gzip':
269
        import gzip
270
        print "********************"
271
        print "Deflating gzip response"
272
        print "********************"
273
        gz = gzip.GzipFile(fileobj=r, mode='rb')
274
        html = gz.read()
275
        gz.close()
276
        return html
277
 
278
if __name__ == '__main__':
279
    main()