| 13566 |
amit.gupta |
1 |
'''
|
|
|
2 |
Created on Jan 15, 2015
|
|
|
3 |
|
|
|
4 |
@author: amit
|
|
|
5 |
'''
|
|
|
6 |
from bson.binary import Binary
|
|
|
7 |
from dtr import main
|
|
|
8 |
from dtr.dao import AffiliateInfo
|
|
|
9 |
from dtr.main import getBrowserObject, ScrapeException, getStore, ParseException
|
|
|
10 |
from pymongo import MongoClient
|
|
|
11 |
from BeautifulSoup import BeautifulSoup
|
|
|
12 |
import datetime
|
|
|
13 |
import json
|
|
|
14 |
import mechanize
|
|
|
15 |
import pymongo
|
|
|
16 |
import re
|
|
|
17 |
import urllib
|
|
|
18 |
import soupselect;soupselect.monkeypatch()
|
|
|
19 |
|
|
|
20 |
USERNAME='saholic1@gmail.com'
|
|
|
21 |
PASSWORD='spice@2020'
|
|
|
22 |
AFFILIATE_URL='http://affiliate.snapdeal.com'
|
|
|
23 |
POST_URL='https://api-p03.hasoffers.com/v3/Affiliate_Report.json'
|
|
|
24 |
ORDER_TRACK_URL='https://m.snapdeal.com/orderSummary'
|
|
|
25 |
CONFIG_URL='http://affiliate.snapdeal.com/publisher/js/config.php'
|
|
|
26 |
|
|
|
27 |
|
|
|
28 |
class Store(main.Store):
|
|
|
29 |
|
|
|
30 |
'''
|
|
|
31 |
This is to map order statuses of our system to order statuses of snapdeal.
|
|
|
32 |
And our statuses will change accordingly.
|
|
|
33 |
|
|
|
34 |
'''
|
|
|
35 |
OrderStatusMap = {
|
|
|
36 |
main.Store.ORDER_PLACED : ['In Progress','N/A'],
|
|
|
37 |
main.Store.ORDER_DELIVERED : ['Delivered'],
|
|
|
38 |
main.Store.ORDER_SHIPPED : ['In Transit'],
|
|
|
39 |
main.Store.ORDER_CANCELLED : ['Closed For Vendor Reallocation']
|
|
|
40 |
|
|
|
41 |
}
|
|
|
42 |
def __init__(self,store_id):
|
|
|
43 |
super(Store, self).__init__(store_id)
|
|
|
44 |
|
|
|
45 |
def getName(self):
|
|
|
46 |
return "snapdeal"
|
|
|
47 |
|
|
|
48 |
def scrapeAffiliate(self, startDate=None, endDate=None):
|
|
|
49 |
br = getBrowserObject()
|
|
|
50 |
br.open(AFFILIATE_URL)
|
|
|
51 |
br.select_form(nr=0)
|
|
|
52 |
br.form['data[User][password]'] = PASSWORD
|
|
|
53 |
br.form['data[User][email]'] = USERNAME
|
|
|
54 |
br.submit()
|
|
|
55 |
response = br.open(CONFIG_URL)
|
|
|
56 |
|
|
|
57 |
token = re.findall('"session_token":"(.*?)"', ungzipResponse(response, br), re.IGNORECASE)[0]
|
|
|
58 |
|
|
|
59 |
allOffers = self._getAllOffers(br, token)
|
|
|
60 |
|
|
|
61 |
allPyOffers = [self.covertToObj(offer).__dict__ for offer in allOffers]
|
|
|
62 |
self._saveToAffiliate(allPyOffers)
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
def parseOrderPage(self, htmlString=None):
|
|
|
66 |
raise NotImplementedError
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
def parseOrderRawHtml(self, orderId, subtagId, userId, rawHtml, orderSuccessUrl):
|
|
|
70 |
br = getBrowserObject()
|
|
|
71 |
url = ORDER_TRACK_URL + re.findall('.*(\?.*?)$', orderSuccessUrl,re.IGNORECASE)[0]
|
|
|
72 |
print url
|
|
|
73 |
response = br.open(url)
|
|
|
74 |
page = ungzipResponse(response, br)
|
|
|
75 |
#page=page.decode("utf-8")
|
|
|
76 |
soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
|
|
|
77 |
#orderHead = soup.find(name, attrs, recursive, text)
|
|
|
78 |
sections = soup.findAll("section")
|
|
|
79 |
|
|
|
80 |
#print sections
|
|
|
81 |
|
|
|
82 |
order = sections[1]
|
|
|
83 |
orderTrs = order.findAll("tr")
|
|
|
84 |
|
|
|
85 |
placedOn = str(orderTrs[0].findAll("td")[1].text)
|
|
|
86 |
|
|
|
87 |
#Pop two section elements
|
|
|
88 |
sections.pop(0)
|
|
|
89 |
sections.pop(0)
|
|
|
90 |
subOrders = sections
|
|
|
91 |
|
|
|
92 |
merchantOrder = {}
|
|
|
93 |
merchantSubOrders = []
|
|
|
94 |
merchantOrder["_id"] = orderId
|
|
|
95 |
merchantOrder["userId"] = userId
|
|
|
96 |
merchantOrder['subTagId'] = subtagId
|
|
|
97 |
merchantOrder['merchantOderId'] = re.findall(r'\d+', str(soup.find("div", {"class":"deals_heading"})))[1]
|
|
|
98 |
merchantOrder['placedOn'] = placedOn
|
|
|
99 |
for orderTr in orderTrs:
|
|
|
100 |
orderTrString = str(orderTr)
|
|
|
101 |
if "Total Amount" in orderTrString:
|
|
|
102 |
merchantOrder['totalAmount'] = re.findall(r'\d+', orderTrString)[0]
|
|
|
103 |
elif "Delivery Charges" in orderTrString:
|
|
|
104 |
merchantOrder['deliveryCharges'] = re.findall(r'\d+', orderTrString)[0]
|
|
|
105 |
elif "Discount Applied" in orderTrString:
|
|
|
106 |
merchantOrder['discountApplied'] = re.findall(r'\d+', orderTrString)[0]
|
|
|
107 |
elif "Paid Amount" in orderTrString:
|
|
|
108 |
merchantOrder['paidAmount'] = re.findall(r'\d+', orderTrString)[0]
|
|
|
109 |
|
|
|
110 |
merchantOrder['closed'] = False
|
|
|
111 |
|
|
|
112 |
for subOrderElement in subOrders:
|
|
|
113 |
productUrl = str(subOrderElement.find("a")['href'])
|
|
|
114 |
subTable = subOrderElement.find("table", {"class":"lrPad"})
|
|
|
115 |
subTrs = subTable.findAll("tr")
|
|
|
116 |
unitPrice=None
|
|
|
117 |
offerDiscount = None
|
|
|
118 |
deliveryCharges = None
|
|
|
119 |
amountPaid = None
|
|
|
120 |
for subTr in subTrs:
|
|
|
121 |
subTrString = str(subTr)
|
|
|
122 |
if "Unit Price" in subTrString:
|
|
|
123 |
unitPrice = re.findall(r'\d+', subTrString)[0]
|
|
|
124 |
if "Quantity" in subTrString:
|
|
|
125 |
qty = re.findall(r'\d+', subTrString)[0]
|
|
|
126 |
elif "Offer Discount" in subTrString:
|
|
|
127 |
offerDiscount = re.findall(r'\d+', subTrString)[0]
|
|
|
128 |
elif "Delivery Charges" in subTrString:
|
|
|
129 |
deliveryCharges = re.findall(r'\d+', subTrString)[0]
|
|
|
130 |
elif "Subtotal" in subTrString:
|
|
|
131 |
amountPaid = str(int(re.findall(r'\d+', subTrString)[0])/int(qty))
|
|
|
132 |
|
|
|
133 |
divs = subOrderElement.findAll("div", {"class": "blk lrPad subordrs"})
|
|
|
134 |
if len(divs)<=0:
|
|
|
135 |
raise ParseException("subOrder", "Could not Parse suborders for Snapdeal")
|
|
|
136 |
|
|
|
137 |
for div in divs:
|
|
|
138 |
merchantSubOrder = {}
|
|
|
139 |
merchantSubOrder['placedOn'] = placedOn
|
|
|
140 |
merchantSubOrder['productTitle'] = str(subOrderElement.find("a").text)
|
|
|
141 |
merchantSubOrder['productUrl'] = "http://m.snapdeal.com/" + productUrl
|
|
|
142 |
merchantSubOrder['productCode'] = re.findall(r'\d+$', productUrl)[0]
|
|
|
143 |
merchantSubOrder['quantity'] = 1
|
|
|
144 |
merchantSubOrder['status'] = 'Order Placed'
|
|
|
145 |
merchantSubOrder['amountPaid'] = amountPaid
|
|
|
146 |
merchantSubOrder['deliveryCharges'] = deliveryCharges
|
|
|
147 |
merchantSubOrder['offerDiscount'] = offerDiscount
|
|
|
148 |
merchantSubOrder['unitPrice'] = unitPrice
|
|
|
149 |
|
|
|
150 |
trackAnchor = div.find("a")
|
|
|
151 |
if trackAnchor is not None:
|
|
|
152 |
merchantSubOrder['tracingkUrl'] = str(trackAnchor['href'])
|
|
|
153 |
|
|
|
154 |
divStr = str(div)
|
|
|
155 |
divStr = divStr.replace("\n","").replace("\t", "")
|
|
|
156 |
|
|
|
157 |
for line in divStr.split("<br />"):
|
|
|
158 |
if "Suborder ID" in line:
|
|
|
159 |
merchantSubOrder['merchantSubOrderId'] = re.findall(r'\d+', line)[0]
|
|
|
160 |
elif "Status" in line:
|
|
|
161 |
merchantSubOrder['detailedStatus'] = re.findall('>(.*?)</span>', line, re.IGNORECASE)[0]
|
|
|
162 |
elif "Est. Shipping Date" in line:
|
|
|
163 |
merchantSubOrder['estimatedShippingDate'] = line.split(":")[1].strip()
|
|
|
164 |
elif "Est. Delivery Date" in line:
|
|
|
165 |
merchantSubOrder['estimatedDeliveryDate'] = line.split(":")[1].strip()
|
|
|
166 |
elif "Courier Name" in line:
|
|
|
167 |
merchantSubOrder['courierName'] = line.split(":")[1].strip()
|
|
|
168 |
elif "Tracking No" in line:
|
|
|
169 |
merchantSubOrder['trackingNumber'] = line.split(":")[1].strip()
|
|
|
170 |
|
|
|
171 |
merchantSubOrders.append(merchantSubOrder)
|
|
|
172 |
|
|
|
173 |
merchantOrder['subOrders'] = merchantSubOrders
|
|
|
174 |
print merchantOrder
|
|
|
175 |
#soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)
|
|
|
176 |
#soup.find(name, attrs, recursive, text)
|
|
|
177 |
|
|
|
178 |
|
|
|
179 |
|
|
|
180 |
|
|
|
181 |
|
|
|
182 |
def scrapeStoreOrders(self,):
|
|
|
183 |
|
|
|
184 |
pass
|
|
|
185 |
|
|
|
186 |
|
|
|
187 |
|
|
|
188 |
|
|
|
189 |
|
|
|
190 |
|
|
|
191 |
"""
|
|
|
192 |
This will insert records with changes only
|
|
|
193 |
"""
|
|
|
194 |
def _saveToAffiliate(self, offers):
|
|
|
195 |
client = MongoClient('mongodb://localhost:27017/')
|
|
|
196 |
db = client.dtr
|
|
|
197 |
collection = db.snapdealOrderAffiliateInfo
|
|
|
198 |
try:
|
|
|
199 |
collection.insert(offers,continue_on_error=True)
|
|
|
200 |
except pymongo.errors.DuplicateKeyError as e:
|
|
|
201 |
print e.details
|
|
|
202 |
|
|
|
203 |
|
|
|
204 |
def _getAllOffers(self, br, token):
|
|
|
205 |
allOffers = []
|
|
|
206 |
nextPage = 1
|
|
|
207 |
while True:
|
|
|
208 |
data = getPostData(token, nextPage)
|
|
|
209 |
response = br.open(POST_URL, data)
|
|
|
210 |
rmap = json.loads(ungzipResponse(response, br))
|
|
|
211 |
if rmap is not None:
|
|
|
212 |
rmap = rmap['response']
|
|
|
213 |
if rmap is not None and len(rmap['errors'])==0:
|
|
|
214 |
allOffers += rmap['data']['data']
|
|
|
215 |
print allOffers
|
|
|
216 |
nextPage += 1
|
|
|
217 |
if rmap['data']['pageCount']<nextPage:
|
|
|
218 |
break
|
|
|
219 |
|
|
|
220 |
return allOffers
|
|
|
221 |
|
|
|
222 |
def covertToObj(self,offer):
|
|
|
223 |
offerData = offer['Stat']
|
|
|
224 |
offer1 = AffiliateInfo(offerData['affiliate_info1'], self.store_id, offerData['conversion_status'], offerData['ad_id'],
|
|
|
225 |
offerData['datetime'], offerData['payout'], offer['Offer']['name'], offerData['ip'], offerData['conversion_sale_amount'])
|
|
|
226 |
return offer1
|
|
|
227 |
def getPostData(token, page = 1, limit= 20, startDate=None, endDate=None):
|
|
|
228 |
endDate=datetime.date.today() + datetime.timedelta(days=1)
|
|
|
229 |
startDate=endDate - datetime.timedelta(days=31)
|
|
|
230 |
|
|
|
231 |
parameters = (
|
|
|
232 |
("page",str(page)),
|
|
|
233 |
("limit",str(limit)),
|
|
|
234 |
("fields[]","Stat.offer_id"),
|
|
|
235 |
("fields[]","Stat.datetime"),
|
|
|
236 |
("fields[]","Offer.name"),
|
|
|
237 |
("fields[]","Stat.conversion_status"),
|
|
|
238 |
("fields[]","Stat.conversion_sale_amount"),
|
|
|
239 |
("fields[]","Stat.payout"),
|
|
|
240 |
("fields[]","Stat.ip"),
|
|
|
241 |
("fields[]","Stat.ad_id"),
|
|
|
242 |
("fields[]","Stat.affiliate_info1"),
|
|
|
243 |
("sort[Stat.datetime]","desc"),
|
|
|
244 |
("filters[Stat.date][conditional]","BETWEEN"),
|
|
|
245 |
("filters[Stat.date][values][]",startDate.strftime('%Y-%m-%d')),
|
|
|
246 |
("filters[Stat.date][values][]",endDate.strftime('%Y-%m-%d')),
|
|
|
247 |
("data_start",startDate.strftime('%Y-%m-%d')),
|
|
|
248 |
("data_end",endDate.strftime('%Y-%m-%d')),
|
|
|
249 |
("Method","getConversions"),
|
|
|
250 |
("NetworkId","jasper"),
|
|
|
251 |
("SessionToken",token),
|
|
|
252 |
)
|
|
|
253 |
#Encode the parameters
|
|
|
254 |
return urllib.urlencode(parameters)
|
|
|
255 |
|
|
|
256 |
def main():
|
|
|
257 |
print "hello"
|
|
|
258 |
store = getStore(3)
|
|
|
259 |
#store.scrapeAffiliate()
|
|
|
260 |
#with open ("data.txt", "r") as myfile:
|
|
|
261 |
# data=myfile.read()
|
|
|
262 |
# myfile.close()
|
|
|
263 |
|
|
|
264 |
store.parseOrderRawHtml(12345, "subtagId", 122323, "html", 'https://m.snapdeal.com/purchaseMobileComplete?code=1f4166d13ea799b65aa9dea68b3e9e70&order=4509499363')
|
|
|
265 |
|
|
|
266 |
def ungzipResponse(r,b):
|
|
|
267 |
headers = r.info()
|
|
|
268 |
if headers['Content-Encoding']=='gzip':
|
|
|
269 |
import gzip
|
|
|
270 |
print "********************"
|
|
|
271 |
print "Deflating gzip response"
|
|
|
272 |
print "********************"
|
|
|
273 |
gz = gzip.GzipFile(fileobj=r, mode='rb')
|
|
|
274 |
html = gz.read()
|
|
|
275 |
gz.close()
|
|
|
276 |
return html
|
|
|
277 |
|
|
|
278 |
if __name__ == '__main__':
|
|
|
279 |
main()
|