| 17013 |
manish.sha |
1 |
'''
|
|
|
2 |
Created on Jan 15, 2015
|
|
|
3 |
|
|
|
4 |
@author: Manish
|
|
|
5 |
'''
|
|
|
6 |
from bs4 import BeautifulSoup
|
|
|
7 |
from bson.binary import Binary
|
|
|
8 |
from datetime import datetime, date, timedelta
|
|
|
9 |
from dtr import main
|
|
|
10 |
from dtr.dao import AffiliateInfo, Order, SubOrder, HomeShopAffiliateInfo
|
|
|
11 |
from dtr.main import getBrowserObject, ScrapeException, getStore, ParseException, \
|
|
|
12 |
Store as MStore, ungzipResponse, tprint
|
|
|
13 |
from dtr.storage import Mongo
|
|
|
14 |
from dtr.storage.Mongo import getImgSrc
|
|
|
15 |
from dtr.utils.utils import fetchResponseUsingProxy, PROXY_MESH_GENERAL
|
|
|
16 |
from pprint import pprint
|
|
|
17 |
from pymongo import MongoClient
|
|
|
18 |
import json
|
|
|
19 |
import pymongo
|
|
|
20 |
import re
|
|
|
21 |
import time
|
|
|
22 |
import traceback
|
|
|
23 |
import urllib
|
|
|
24 |
import urllib2
|
|
|
25 |
from urlparse import urlparse, parse_qs
|
|
|
26 |
import xml.etree.ElementTree as ET
|
|
|
27 |
from dtr.storage import MemCache
|
|
|
28 |
from dtr.storage.Mongo import getDealRank
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
AFFLIATE_TRASACTIONS_URL = "https://admin.optimisemedia.com/v2/reports/affiliate/leads/leadsummaryexport.aspx?Contact=796881&Country=26&Agency=95&Merchant=331902&Status=-1&Year=%d&Month=%d&Day=%d&EndYear=%d&EndMonth=%d&EndDay=%d&DateType=0&Sort=CompletionDate&Login=1347562DA5E3EFF6FB1561765C47C782&Format=XML&RestrictURL=0"
|
|
|
32 |
ORDER_TRACK_URL='https://m.homeshop18.com/order/orderDetail.mobi?orderId=%d'
|
|
|
33 |
HS_ORDER_TRACK_URL='http://www.homeshop18.com/track-your-order.html'
|
|
|
34 |
BASE_URL= 'http://www.shopclues.com'
|
|
|
35 |
BASE_MURL= 'http://m.shopclues.com'
|
|
|
36 |
BASE_PRODUCT_URL= 'http://m.homeshop18.com/product.mobi?productId=%d'
|
|
|
37 |
BASE_IMG_URL='http://stat.homeshop18.com/homeshop18'
|
|
|
38 |
|
|
|
39 |
|
|
|
40 |
#http://m.homeshop18.com/checkout/paySuccess.mobi?orderComplete=true
|
|
|
41 |
|
|
|
42 |
class Store(MStore):
|
|
|
43 |
'''
|
|
|
44 |
This is to map order statuses of our system to order statuses of snapdeal.
|
|
|
45 |
And our statuses will change accordingly.
|
|
|
46 |
|
|
|
47 |
'''
|
|
|
48 |
OrderStatusMap = {
|
|
|
49 |
MStore.ORDER_PLACED : ['payment successful', 'new order - cod confirmation pending', 'processing', 'quality check','on schedule', 'processing - pickup initiated', 'processing - ready to dispatch','processing - procurement delay from merchant','processing - slight procurment delay from merchant','cod order confirmed by customer'],
|
|
|
50 |
MStore.ORDER_DELIVERED : ['delivered', 'complete'],
|
|
|
51 |
MStore.ORDER_SHIPPED : ['in transit', 'dispatched','shipped','order handed to courier','order handed over to courier'],
|
|
|
52 |
MStore.ORDER_CANCELLED : ['payment failed', 'canceled', 'payment declined', 'order on hold - cancellation requested by customer', 'courier returned', 'canceled on customer request', 'canceled by customer','order canceled by customer','canceled - address not shippable','return complete','undelivered - returning to origin']
|
|
|
53 |
}
|
|
|
54 |
OrderStatusConfirmationMap= {
|
|
|
55 |
"P" : "Payment Successful",
|
|
|
56 |
"D" : "Order Declined",
|
|
|
57 |
"O" : "New Order - COD confirmation Pending"
|
|
|
58 |
}
|
|
|
59 |
|
|
|
60 |
OrderStatusStringMap = {
|
|
|
61 |
MStore.ORDER_PLACED : ['expect the order to reach', 'received your payment'],
|
|
|
62 |
MStore.ORDER_DELIVERED : ['has been delivered'],
|
|
|
63 |
MStore.ORDER_SHIPPED : ['has been shipped', 'has been dispatched'],
|
| 17121 |
manish.sha |
64 |
MStore.ORDER_CANCELLED : ['has been cancelled', 'has been rejected','is returned back to us','payment failed']
|
| 17013 |
manish.sha |
65 |
}
|
|
|
66 |
|
| 17119 |
manish.sha |
67 |
OrderStatusShownMap = {
|
|
|
68 |
"Under Process" : ['expect the order to reach', 'received your payment'],
|
|
|
69 |
"Order Delivered" : ['has been delivered'],
|
|
|
70 |
"Order Shipped" : ['has been shipped', 'has been dispatched'],
|
| 17121 |
manish.sha |
71 |
"Order Cancelled" : ['has been cancelled', 'has been rejected','is returned back to us','payment failed']
|
| 17119 |
manish.sha |
72 |
}
|
|
|
73 |
|
| 17013 |
manish.sha |
74 |
CONF_CB_AMOUNT = MStore.CONF_CB_DISCOUNTED_PRICE
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
def __init__(self,store_id):
|
|
|
78 |
super(Store, self).__init__(store_id)
|
|
|
79 |
|
|
|
80 |
def convertToObj(self,offer):
|
|
|
81 |
orderRef = offer['MerchantRef']
|
|
|
82 |
if len(orderRef)>15:
|
|
|
83 |
orderRef = orderRef[0:len(orderRef)-10]
|
| 17248 |
manish.sha |
84 |
offer1 = HomeShopAffiliateInfo(offer['UID'], offer['TransactionTime'], offer['TransactionID'], orderRef, orderRef, offer['Merchant'], offer['PID'], offer['Product'], float(str(offer['SR'])), float(str(offer['TransactionValue'])), offer['UKey'], offer['ClickTime'], offer['Status'])
|
| 17013 |
manish.sha |
85 |
return offer1
|
|
|
86 |
|
|
|
87 |
def _saveToAffiliate(self, offers):
|
|
|
88 |
collection = self.db.homeshopOrderAffiliateInfo
|
|
|
89 |
mcollection = self.db.merchantOrder
|
|
|
90 |
for offerObj in offers:
|
|
|
91 |
offer = self.convertToObj(offerObj)
|
|
|
92 |
collection.update({"transactionId":offer.transactionId, "subTagId":offer.subTagId, "payOut":offer.payOut},{"$set":todict(offer)}, upsert=True)
|
|
|
93 |
mcollection.update({"subTagId":offer.subTagId, "storeId":self.store_id, "subOrders.missingAff":True}, {"$set":{"subOrders.$.missingAff":False}})
|
|
|
94 |
|
|
|
95 |
def scrapeAffiliate(self, startDate=datetime.today() - timedelta(days=10), endDate=datetime.today()):
|
|
|
96 |
uri = AFFLIATE_TRASACTIONS_URL%(startDate.year,startDate.month,startDate.day,endDate.year,endDate.month,endDate.day)
|
|
|
97 |
root = ET.parse(urllib2.urlopen(uri)).getroot()
|
|
|
98 |
if len(root)> 0 and len(root[0])> 0:
|
|
|
99 |
offers = []
|
|
|
100 |
for child in root[0][0]:
|
|
|
101 |
offers.append(child.attrib)
|
|
|
102 |
self._saveToAffiliate(offers)
|
|
|
103 |
|
|
|
104 |
def _setLastSaleDate(self, saleDate):
|
|
|
105 |
self.db.lastSaleDtate.update({'storeId':self.store_id}, {'$set':{'saleDate':saleDate}})
|
|
|
106 |
|
|
|
107 |
def getName(self):
|
|
|
108 |
return "homeshop18"
|
|
|
109 |
|
|
|
110 |
|
|
|
111 |
def _getLastSaleDate(self,):
|
|
|
112 |
lastDaySaleObj = self.db.lastDaySale.find_one({"storeId":self.store_id})
|
|
|
113 |
if lastDaySaleObj is None:
|
|
|
114 |
return datetime.min
|
|
|
115 |
|
|
|
116 |
def _getStatusFromDetailedStatus(self, detailedStatus):
|
| 17120 |
manish.sha |
117 |
for key, statusList in Store.OrderStatusStringMap.iteritems():
|
|
|
118 |
for value in statusList:
|
|
|
119 |
if value in detailedStatus.lower():
|
|
|
120 |
return key
|
| 17013 |
manish.sha |
121 |
print "Detailed Status need to be mapped", detailedStatus, self.store_id
|
|
|
122 |
return None
|
|
|
123 |
|
| 17119 |
manish.sha |
124 |
def _getDisplayStatusFromDetailedStatus(self, detailedStatus):
|
| 17120 |
manish.sha |
125 |
for key, statusList in Store.OrderStatusShownMap.iteritems():
|
|
|
126 |
for value in statusList:
|
|
|
127 |
if value in detailedStatus.lower():
|
|
|
128 |
return key
|
| 17119 |
manish.sha |
129 |
print "Display Status need to be mapped", detailedStatus, self.store_id
|
|
|
130 |
return None
|
|
|
131 |
|
| 17013 |
manish.sha |
132 |
def updateCashbackInSubOrders(self, subOrders):
|
|
|
133 |
for subOrder in subOrders:
|
|
|
134 |
cashbackStatus = Store.CB_NA
|
|
|
135 |
cashbackAmount = 0
|
|
|
136 |
percentage = 0
|
|
|
137 |
amount = subOrder.amountPaid
|
|
|
138 |
if amount > 0:
|
|
|
139 |
(cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amount)
|
|
|
140 |
if cashbackAmount > 0:
|
|
|
141 |
cashbackStatus = Store.CB_PENDING
|
|
|
142 |
subOrder.cashBackStatus = cashbackStatus
|
|
|
143 |
subOrder.cashBackAmount = cashbackAmount
|
|
|
144 |
subOrder.cashBackPercentage = percentage
|
|
|
145 |
return subOrders
|
|
|
146 |
|
|
|
147 |
def _parseUsingOrderJson(self, orderId, subTagId, userId, rawHtmlSoup, orderSuccessUrl):
|
|
|
148 |
orderObj = None
|
|
|
149 |
|
|
|
150 |
scripts = rawHtmlSoup.find_all('script')
|
|
|
151 |
for script in scripts:
|
|
|
152 |
if 'var order =' in script.text:
|
|
|
153 |
requiredObjList = script.text.strip().split('\n')
|
|
|
154 |
for val in requiredObjList:
|
|
|
155 |
if "$.parseJSON('" in val:
|
| 17309 |
manish.sha |
156 |
val = val.encode("ascii","ignore")
|
|
|
157 |
print 'Val......',val
|
| 17013 |
manish.sha |
158 |
print val.split("$.parseJSON('")[1].split("');")[0]
|
|
|
159 |
orderObj = json.loads(val.split("$.parseJSON('")[1].split("');")[0])
|
|
|
160 |
print orderObj
|
|
|
161 |
break
|
|
|
162 |
break
|
|
|
163 |
|
|
|
164 |
if orderObj is not None:
|
|
|
165 |
merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
|
|
|
166 |
merchantOrder.placedOn = orderObj['orderDate']
|
|
|
167 |
merchantOrder.merchantOrderId = str(long(orderObj['orderId']))
|
|
|
168 |
merchantOrder.paidAmount = long(orderObj['pricing']['orderNetPrice'])
|
|
|
169 |
merchantOrder.totalAmount = long(orderObj['pricing']['orderGrossPrice'])
|
|
|
170 |
merchantOrder.discountApplied = long(orderObj['pricing']['discountCouponRedemptionAmount'])+long(orderObj['pricing']['giftCouponRedemptionAmount'])
|
|
|
171 |
merchantOrder.deliveryCharges = long(orderObj['totalShipmentCharges'])
|
|
|
172 |
subOrders= []
|
|
|
173 |
for subOrderObj in orderObj['subOrders']:
|
| 17045 |
manish.sha |
174 |
subOrder = SubOrder(subOrderObj['cartItem']['cartItemTitle'], BASE_PRODUCT_URL%(long(subOrderObj['cartItem']['productId'])), orderObj['orderDate'], long(subOrderObj['pricing']['payablePrice']))
|
| 17046 |
manish.sha |
175 |
subOrder.estimatedDeliveryDate = subOrderObj['shipment']['expectedDeliveryDate']
|
| 17013 |
manish.sha |
176 |
subOrder.merchantSubOrderId = str(subOrderObj['subOrderId'])
|
| 17047 |
manish.sha |
177 |
if rawHtmlSoup.body.find("div", {'class':'sub-order-status'}) is not None:
|
|
|
178 |
subOrder.detailedStatus = rawHtmlSoup.body.find("div", {'class':'sub-order-status'}).text
|
|
|
179 |
else:
|
|
|
180 |
subOrder.detailedStatus = 'Order Placed'
|
| 17013 |
manish.sha |
181 |
subOrder.imgUrl = BASE_IMG_URL+subOrderObj['cartItem']['lineItemImageUrl']
|
| 17054 |
manish.sha |
182 |
subOrder.offerDiscount = (long(subOrderObj['cartItem']['price'])+long(subOrderObj['shipment']['shipmentCharge']))*long(subOrderObj['cartItem']['itemQuantity'])-long(subOrderObj['pricing']['payablePrice'])
|
| 17013 |
manish.sha |
183 |
subOrder.unitPrice = long(subOrderObj['cartItem']['price'])
|
|
|
184 |
subOrder.productCode = str(long(subOrderObj['cartItem']['productId']))
|
| 17046 |
manish.sha |
185 |
subOrder.amountPaid = long(subOrderObj['pricing']['payablePrice'])
|
| 17013 |
manish.sha |
186 |
subOrder.quantity = long(subOrderObj['cartItem']['itemQuantity'])
|
|
|
187 |
subOrder.tracingkUrl = ORDER_TRACK_URL%(long(orderObj['orderId']))
|
|
|
188 |
dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
|
|
|
189 |
subOrder.dealRank = dealRank.get('rank')
|
|
|
190 |
subOrder.rankDesc = dealRank.get('description')
|
|
|
191 |
subOrder.maxNlc = dealRank.get('maxNlc')
|
|
|
192 |
subOrder.minNlc = dealRank.get('minNlc')
|
|
|
193 |
subOrder.db = dealRank.get('dp')
|
|
|
194 |
subOrder.itemStatus = dealRank.get('status')
|
| 17051 |
manish.sha |
195 |
subOrders.append(subOrder)
|
| 17013 |
manish.sha |
196 |
merchantOrder.subOrders = self.updateCashbackInSubOrders(subOrders)
|
|
|
197 |
return merchantOrder
|
|
|
198 |
|
|
|
199 |
|
|
|
200 |
def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl):
|
|
|
201 |
resp = {}
|
|
|
202 |
try:
|
|
|
203 |
rawHtmlSoup = BeautifulSoup(rawHtml)
|
|
|
204 |
merchantOrder = self._parseUsingOrderJson(orderId, subTagId, userId, rawHtmlSoup, orderSuccessUrl)
|
|
|
205 |
merchantOrder.orderTrackingUrl = ORDER_TRACK_URL%(long(merchantOrder.merchantOrderId))
|
|
|
206 |
if self._saveToOrder(todict(merchantOrder)):
|
|
|
207 |
resp['result'] = 'ORDER_CREATED'
|
|
|
208 |
else:
|
|
|
209 |
resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'
|
|
|
210 |
|
|
|
211 |
return resp
|
|
|
212 |
except:
|
|
|
213 |
print "Error occurred"
|
|
|
214 |
traceback.print_exc()
|
|
|
215 |
resp['result'] = 'ORDER_NOT_CREATED'
|
|
|
216 |
return resp
|
|
|
217 |
|
|
|
218 |
def scrapeStoreOrders(self,):
|
|
|
219 |
#collectionMap = {'palcedOn':1}
|
|
|
220 |
searchMap = {}
|
|
|
221 |
collectionMap = {"orderTrackingUrl":1,"merchantOrderId":1}
|
|
|
222 |
orders = self._getActiveOrders(searchMap,collectionMap)
|
|
|
223 |
for order in orders:
|
| 17112 |
manish.sha |
224 |
bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
|
| 17098 |
manish.sha |
225 |
try:
|
|
|
226 |
print "Order", self.store_name, order['orderId'], order['orderTrackingUrl'], order['merchantOrderId']
|
|
|
227 |
br1 = track(HS_ORDER_TRACK_URL, order['merchantOrderId'])
|
|
|
228 |
ungzipResponseBr(br1.response(), br1)
|
| 17112 |
manish.sha |
229 |
trackPageSoup = BeautifulSoup(br1.response())
|
| 17098 |
manish.sha |
230 |
subOrderTable = trackPageSoup.body.find("table", {'class':'lower-table'})
|
| 17112 |
manish.sha |
231 |
subOrders = subOrderTable.find_all('tr')
|
| 17098 |
manish.sha |
232 |
firstRow = subOrders.pop(0)
|
|
|
233 |
closed = True
|
|
|
234 |
for row in subOrders:
|
|
|
235 |
cols = row.find_all('td')
|
|
|
236 |
subOrderId = cols[0].text.strip()
|
|
|
237 |
subOrderStatus = cols[1].text.strip()
|
|
|
238 |
subbulk = self.db.merchantOrder.initialize_ordered_bulk_op()
|
|
|
239 |
print 'Sub Order Id', str(subOrderId)
|
|
|
240 |
subOrder = self._isSubOrderActive(order, str(subOrderId))
|
|
|
241 |
if subOrder is None:
|
|
|
242 |
print 'No HS Sub Order Found for SubOrder Id:- '+ str(subOrderId)
|
|
|
243 |
elif subOrder['closed']:
|
|
|
244 |
continue
|
| 17013 |
manish.sha |
245 |
else:
|
| 17098 |
manish.sha |
246 |
findMap = {"orderId": order['orderId'], "subOrders.merchantSubOrderId": str(subOrderId)}
|
|
|
247 |
updateMap = {}
|
| 17119 |
manish.sha |
248 |
displayStatus = self._getDisplayStatusFromDetailedStatus(subOrderStatus)
|
| 17098 |
manish.sha |
249 |
status = self._getStatusFromDetailedStatus(subOrderStatus)
|
| 17119 |
manish.sha |
250 |
if displayStatus is not None:
|
|
|
251 |
updateMap["subOrders.$.detailedStatus"] = displayStatus
|
| 17098 |
manish.sha |
252 |
closedStatus = status in [Store.ORDER_DELIVERED, Store.ORDER_CANCELLED]
|
|
|
253 |
if status is not None:
|
|
|
254 |
updateMap["subOrders.$.status"] = status
|
|
|
255 |
if closedStatus:
|
|
|
256 |
#if status is closed then change the paybackStatus accordingly
|
|
|
257 |
print 'Order Closed'
|
|
|
258 |
updateMap["subOrders.$.closed"] = True
|
|
|
259 |
if status == Store.ORDER_DELIVERED:
|
|
|
260 |
if subOrder.get("cashBackStatus") == Store.CB_PENDING:
|
|
|
261 |
updateMap["subOrders.$.cashBackStatus"] = Store.CB_APPROVED
|
|
|
262 |
elif status == Store.ORDER_CANCELLED:
|
|
|
263 |
if subOrder.get("cashBackStatus") == Store.CB_PENDING:
|
|
|
264 |
updateMap["subOrders.$.cashBackStatus"] = Store.CB_CANCELLED
|
|
|
265 |
else:
|
|
|
266 |
closed = False
|
|
|
267 |
print 'Order not Closed'
|
|
|
268 |
|
|
|
269 |
subbulk.find(findMap).update({'$set' : updateMap})
|
|
|
270 |
subresult = subbulk.execute()
|
|
|
271 |
tprint(subresult)
|
| 17112 |
manish.sha |
272 |
bulk.find({'orderId': order['orderId']}).update({'$set':{'closed': closed,"parseError":False}})
|
|
|
273 |
result = bulk.execute()
|
|
|
274 |
tprint(result)
|
| 17098 |
manish.sha |
275 |
except:
|
|
|
276 |
print "Error occurred for tracking order... "+str(order['merchantOrderId'])
|
|
|
277 |
traceback.print_exc()
|
|
|
278 |
continue
|
| 17013 |
manish.sha |
279 |
|
|
|
280 |
|
|
|
281 |
|
|
|
282 |
def track(url, orderId):
|
|
|
283 |
br = getBrowserObject()
|
|
|
284 |
br.set_proxies({"http": PROXY_MESH_GENERAL})
|
|
|
285 |
response = br.open(url)
|
|
|
286 |
ungzipResponseBr(response, br)
|
|
|
287 |
soup = BeautifulSoup(br.response())
|
|
|
288 |
csrf = soup.find('input', {'name': '_csrf'}).get('value')
|
|
|
289 |
print csrf
|
|
|
290 |
#html = response.read()
|
|
|
291 |
#print html
|
|
|
292 |
br.select_form(name='trackForm')
|
|
|
293 |
br.form['orderId'] = str(orderId)
|
|
|
294 |
response = br.submit()
|
|
|
295 |
print "********************"
|
|
|
296 |
print "Attempting to Login"
|
|
|
297 |
print "********************"
|
|
|
298 |
#ungzipResponse(response, br)
|
|
|
299 |
return br
|
|
|
300 |
|
|
|
301 |
def ungzipResponseBr(r,b):
|
|
|
302 |
headers = r.info()
|
|
|
303 |
if headers['Content-Encoding']=='gzip':
|
|
|
304 |
import gzip
|
|
|
305 |
print "********************"
|
|
|
306 |
print "Deflating gzip response"
|
|
|
307 |
print "********************"
|
|
|
308 |
gz = gzip.GzipFile(fileobj=r, mode='rb')
|
|
|
309 |
html = gz.read()
|
|
|
310 |
gz.close()
|
|
|
311 |
headers["Content-type"] = "text/html; charset=utf-8"
|
|
|
312 |
r.set_data( html )
|
|
|
313 |
b.set_response(r)
|
|
|
314 |
|
|
|
315 |
|
|
|
316 |
def to_py_date(java_timestamp):
|
|
|
317 |
date = datetime.fromtimestamp(java_timestamp)
|
|
|
318 |
return date
|
|
|
319 |
|
|
|
320 |
def todict(obj, classkey=None):
|
|
|
321 |
if isinstance(obj, dict):
|
|
|
322 |
data = {}
|
|
|
323 |
for (k, v) in obj.items():
|
|
|
324 |
data[k] = todict(v, classkey)
|
|
|
325 |
return data
|
|
|
326 |
elif hasattr(obj, "_ast"):
|
|
|
327 |
return todict(obj._ast())
|
|
|
328 |
elif hasattr(obj, "__iter__"):
|
|
|
329 |
return [todict(v, classkey) for v in obj]
|
|
|
330 |
elif hasattr(obj, "__dict__"):
|
|
|
331 |
data = dict([(key, todict(value, classkey))
|
|
|
332 |
for key, value in obj.__dict__.iteritems()
|
|
|
333 |
if not callable(value) and not key.startswith('_')])
|
|
|
334 |
if classkey is not None and hasattr(obj, "__class__"):
|
|
|
335 |
data[classkey] = obj.__class__.__name__
|
|
|
336 |
return data
|
|
|
337 |
else:
|
|
|
338 |
return obj
|