Subversion Repositories SmartDukaan

Rev

Rev 20699 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
13774 amit.gupta 1
# coding=utf-8
13569 amit.gupta 2
'''
3
Created on Jan 15, 2015
4
 
5
@author: amit
6
'''
13774 amit.gupta 7
from base64 import encode
8
from bs4 import BeautifulSoup
17231 amit.gupta 9
from datetime import datetime, timedelta, date
16874 amit.gupta 10
from dtr.api.Order import process_rejects
18028 amit.gupta 11
from dtr.dao import Order, SubOrder, AmazonAffiliateInfo, obj
14464 amit.gupta 12
from dtr.main import getStore, Store as MStore, ParseException, getBrowserObject, \
14624 amit.gupta 13
    ungzipResponse, tprint
17307 amit.gupta 14
from dtr.storage.DataService import OrdersRaw, Orders, Order_Parse_Info, \
16986 amit.gupta 15
    All_user_addresses
16874 amit.gupta 16
from dtr.storage.Mongo import getDealRank
17307 amit.gupta 17
from dtr.utils import utils
18440 amit.gupta 18
from dtr.utils.utils import fetchResponseUsingProxy, readSSh, todict
16874 amit.gupta 19
from elixir import *
14650 amit.gupta 20
import base64
16874 amit.gupta 21
import dtr
14650 amit.gupta 22
import gzip
14624 amit.gupta 23
import mechanize
14285 amit.gupta 24
import os.path
18376 amit.gupta 25
from pyquery import PyQuery as pq
13774 amit.gupta 26
import re
14033 amit.gupta 27
import time
13809 amit.gupta 28
import traceback
14650 amit.gupta 29
import urllib2
20687 amit.gupta 30
import urlparse
13774 amit.gupta 31
 
32
ORDER_REDIRECT_URL = 'https://www.amazon.in/gp/css/summary/edit.html?orderID=%s'
13810 amit.gupta 33
ORDER_SUCCESS_URL = 'https://www.amazon.in/gp/buy/spc/handlers/static-submit-decoupled.html'
13823 amit.gupta 34
THANKYOU_URL = 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html'
14624 amit.gupta 35
AMAZON_AFF_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/listReports'
14650 amit.gupta 36
AMAZON_AFF_FILE_URL = 'https://assoc-datafeeds-eu.amazon.com/datafeed/getReport?filename=saholic-21-orders-report-%s.tsv.gz'
13774 amit.gupta 37
class Store(MStore):
13821 amit.gupta 38
 
17348 amit.gupta 39
    orderStatusRegexMap = { MStore.ORDER_PLACED : ['ordered from', 'not yet dispatched','dispatching now', 'preparing for dispatch', 'order received'],
14064 amit.gupta 40
                            MStore.ORDER_SHIPPED : ['dispatched on','dispatched', 'on the way', 'out for delivery', 'Out for delivery'],
17324 amit.gupta 41
                            MStore.ORDER_CANCELLED : ['return complete', 'refunded', 'cancelled', 'replacement complete', 'return received'],
17258 amit.gupta 42
                            MStore.ORDER_DELIVERED : ['delivered', 'your package was delivered', 'package was handed directly to customer']
13821 amit.gupta 43
                           }
13774 amit.gupta 44
 
45
    def __init__(self,store_id):
46
        super(Store, self).__init__(store_id)
47
 
48
    def getName(self):
14749 amit.gupta 49
        return "amazon"
13774 amit.gupta 50
 
14945 amit.gupta 51
    def parseOrderRawHtml(self, orderId, subTagId, userId, rawHtml, orderSuccessUrl, track=False):
17718 amit.gupta 52
        rawHtml = re.sub(r'[^\x00-\x7F]+',' ', rawHtml)
14958 amit.gupta 53
        parseString = "Tracking" if track else "Transacted"
54
        print parseString, "Order Id to be parsed is :", orderId
13796 amit.gupta 55
        resp = {}
14813 amit.gupta 56
        resp['result'] = 'ORDER_NOT_CREATED'
13821 amit.gupta 57
        if ORDER_SUCCESS_URL in orderSuccessUrl or THANKYOU_URL in orderSuccessUrl:
13774 amit.gupta 58
            try:
17600 amit.gupta 59
                doc = pq(rawHtml)
14608 amit.gupta 60
                try:
17600 amit.gupta 61
                    orderUrl = doc('a.a-touch-link').attr.href
14608 amit.gupta 62
                    merchantOrderId = re.findall(r'.*&oid=(.*)&?.*?', orderUrl)[0]
63
                except:
17600 amit.gupta 64
                    merchantOrderId = doc("#orders-list>div>span>b").html()
20687 amit.gupta 65
                    if not merchantOrderId:
66
                        merchantOrderId = urlparse.parse_qs(urlparse.urlsplit(orderSuccessUrl).query)
67
                    else:
68
                        raise
20771 amit.gupta 69
                if not merchantOrderId  or not re.match("\d+-\d+-\d+", merchantOrderId):
20699 amit.gupta 70
                    raise 
14699 amit.gupta 71
                order = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, True)
14972 amit.gupta 72
                order.orderTrackingUrl = ORDER_REDIRECT_URL % (merchantOrderId)
73
                order.orderSuccessUrl = orderSuccessUrl
14211 amit.gupta 74
                order.merchantOrderId = merchantOrderId
14291 amit.gupta 75
                order.requireDetail = True
14698 amit.gupta 76
                order.status = 'html_required'
14297 amit.gupta 77
                order.closed = None
14312 amit.gupta 78
                if self._saveToOrder(todict(order)):
79
                    resp['result'] = 'ORDER_CREATED'
80
                    resp["url"] = ORDER_REDIRECT_URL % (merchantOrderId)
81
                    resp["htmlRequired"] = True
82
                    resp['orderId'] = orderId
83
                else:
84
                    resp['result'] = 'ORDER_ALREADY_CREATED_IGNORED'
85
 
13774 amit.gupta 86
            except:
14813 amit.gupta 87
                #Write all cases here for Order Not created Known
18158 amit.gupta 88
                soup = BeautifulSoup(rawHtml, "html5lib")
14809 amit.gupta 89
                try:
16874 amit.gupta 90
                    if not soup.body:
91
                        resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
17359 amit.gupta 92
                    elif 'Securely redirecting you' in soup.find("h3").text.strip() or soup.find("h3").text.strip()=="Orders":
14813 amit.gupta 93
                        resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
14809 amit.gupta 94
                    else:
14813 amit.gupta 95
                        raise
14809 amit.gupta 96
                except:
14814 amit.gupta 97
                    try:
17356 amit.gupta 98
                        if soup.find("h1").text.strip() in ['This is a duplicate order', 'There was a problem with your payment.', 'Your Orders', 'Your Shopping Cart is empty.', 'Select a payment method', 'Edit quantities'] or "Saved for later" in soup.find("h1").text.strip():
14814 amit.gupta 99
                            resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
100
                        else:
101
                            raise
102
                    except:
15046 amit.gupta 103
                        try:
17356 amit.gupta 104
                            if soup.find("h2").text.strip() in ['Web page not available','Webpage not available', 'Do you have an Amazon password?']:
15046 amit.gupta 105
                                resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
106
                            else:
107
                                raise
108
                        except:
15565 amit.gupta 109
                            try:
16876 amit.gupta 110
                                if soup.find(id="loading-spinner-img") is not None or soup.find(id="anonCarousel1") is not None or soup.find(id="ap_signin_pagelet_title") is not None or soup.find(id="nav-greeting-name") is not None:
15565 amit.gupta 111
                                    resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
16877 amit.gupta 112
                                elif soup.find("b", {'class':'h1'}).text.strip().find("We're sorry") > -1:
113
                                    resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
15565 amit.gupta 114
                                else:
115
                                    raise
116
                            except:
117
                                resp['result'] = 'ORDER_NOT_CREATED_UNKNOWN'
118
 
13774 amit.gupta 119
        else:
13781 amit.gupta 120
            try:
14722 amit.gupta 121
                mo = self.db.merchantOrder.find_one({"orderId":orderId})
122
                if mo is not None:
14749 amit.gupta 123
                    merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl, False)
14722 amit.gupta 124
                    merchantOrder.createdOn = mo.get("createdOn")
125
                    merchantOrder.createdOnInt =  mo.get("createdOnInt")
126
                else:
14945 amit.gupta 127
                    print "Could not find amazon order with order Id", orderId
14749 amit.gupta 128
                    merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
18158 amit.gupta 129
                soup = BeautifulSoup(rawHtml, "html5lib")
17331 amit.gupta 130
                if not soup.body:
17319 amit.gupta 131
                    resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
132
                else:   
14545 amit.gupta 133
                    try:
17319 amit.gupta 134
                        self.parseNewStlye(merchantOrder, soup)
15046 amit.gupta 135
                        resp['result'] = 'DETAIL_CREATED'
14545 amit.gupta 136
                    except:
15056 amit.gupta 137
                        try:
17319 amit.gupta 138
                            traceback.print_exc()
18376 amit.gupta 139
                            self.parseAnotherStlye(merchantOrder, pq(rawHtml))
17319 amit.gupta 140
                            resp['result'] = 'DETAIL_CREATED'
15056 amit.gupta 141
                        except:
15059 amit.gupta 142
                            try:
18376 amit.gupta 143
                                traceback.print_exc()
144
                                self.parseOldStlye(merchantOrder, soup)
145
                                resp['result'] = 'DETAIL_CREATED'
15059 amit.gupta 146
                            except:
18376 amit.gupta 147
                                traceback.print_exc()
17319 amit.gupta 148
                                try:
18376 amit.gupta 149
                                    self.parseCancelled(merchantOrder, soup)
150
                                    resp['result'] = 'ORDER_CANCELLED'
17319 amit.gupta 151
                                except:
18376 amit.gupta 152
                                    try:
153
                                        if soup.find("h1").text.strip() in ["Your Account"] or soup.find("h1").span.text=="Account":
154
                                            resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
155
                                        else:
156
                                            raise
157
                                    except:
158
                                        if soup.find(id="ap_signin_pagelet_title").find("h1").text.strip()=="Sign In":
159
                                            resp['result'] = 'DETAIL_NOT_CREATED_KNOWN'
160
                                        else:
161
                                            raise
17599 amit.gupta 162
                if resp['result'] == 'DETAIL_NOT_CREATED_KNOWN':
17357 amit.gupta 163
                    self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})
13781 amit.gupta 164
            except:
17666 amit.gupta 165
                self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"status":"html_required"}})
14813 amit.gupta 166
                print "Error occurred"
15046 amit.gupta 167
                resp['result'] = 'DETAIL_NOT_CREATED_UNKNOWN'
14813 amit.gupta 168
                traceback.print_exc()
15046 amit.gupta 169
        return resp    
13774 amit.gupta 170
 
13868 amit.gupta 171
    #This should be exposed from api for specific sources
172
    def scrapeStoreOrders(self):
18725 amit.gupta 173
        orders = self.db.merchantOrder.find({"storeId":1, "closed":False, "subOrders.closed":False, "subOrders.trackingUrl":{"$exists":True}, "subOrders.login":{"$exists":False}})
14624 amit.gupta 174
        for merchantOrder in orders:
14981 amit.gupta 175
            executeBulk = False
14624 amit.gupta 176
            try:
177
                bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
178
                closed = True
179
                map1 = {}
180
                for subOrder in merchantOrder.get("subOrders"):
181
                    if subOrder.get("closed"):
182
                        continue
183
                    elif subOrder.get("trackingUrl") is None:
184
                        closed = False
185
                        continue
186
                    findMap = {"orderId":merchantOrder.get("orderId"), "subOrders.merchantSubOrderId":subOrder.get("merchantSubOrderId")} 
187
                    trackingUrl = subOrder.get("trackingUrl")
188
                    if not map1.has_key(trackingUrl):
16468 amit.gupta 189
                        map1[trackingUrl] = self.parseTrackingUrl(trackingUrl, merchantOrder.get("orderId"))
14624 amit.gupta 190
                    newOrder = map1.get(trackingUrl)
16474 amit.gupta 191
                    if newOrder:
192
                        executeBulk = True
193
                        updateMap = self.getUpdateMap(newOrder, subOrder.get('cashBackStatus'))
194
                        print findMap, "\n", updateMap
195
                        bulk.find(findMap).update({'$set' : updateMap})
196
                        closed = closed and newOrder['closed']
14981 amit.gupta 197
                if executeBulk:
14697 amit.gupta 198
                    bulk.find({"orderId":merchantOrder.get("orderId")}).update({"$set":{"closed":closed, "parseError":False}})
14981 amit.gupta 199
                    bulk.execute()
14624 amit.gupta 200
            except:
14847 amit.gupta 201
                tprint("Could not update " + str(merchantOrder['orderId']) + " For store " + self.getName())
14692 amit.gupta 202
                self.db.merchantOrder.update({"orderId":merchantOrder['orderId']}, {"$set":{"parseError":True}})
14624 amit.gupta 203
                traceback.print_exc()
204
 
14608 amit.gupta 205
 
206
 
14771 amit.gupta 207
    def parserest(self, soup):
208
        print "Hi"
209
        if soup.find('h1'):
210
            print "OK"
14608 amit.gupta 211
 
14464 amit.gupta 212
    def parseOldStlye(self, merchantOrder, soup):
213
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
214
        table = soup.body.findAll("table", recursive=False)[1]
215
        #print table
216
        tables = table.tr.td.findAll("table", recursive=False)
217
        for tr in tables[2].findAll("tr"):
218
            boldElement = tr.td.b
219
            if "Order Placed" in str(boldElement):
220
                merchantOrder.placedOn = boldElement.next_sibling.strip()
221
            if "order number" in str(boldElement):
222
                merchantOrder.merchantOrderId = boldElement.next_sibling.strip()
223
            if "Order Total" in str(boldElement):
224
                merchantOrder.paidAmount = int(float(boldElement.find('span').contents[-1].replace(',','')))
225
        anchors = table.tr.td.findAll("a", recursive=False)
226
        paymentAnchor = anchors.pop(-1)
227
 
228
        count = 0
229
        subOrders = []
230
        merchantOrder.subOrders = subOrders
231
        counter = 0 
232
        for anchor in anchors:
233
            count += 1
234
            tab = anchor.next_sibling
235
            status = MStore.ORDER_PLACED
236
            subStr = "Delivery #" + str(count) + ":"
237
            if subStr in  tab.find("b").text:
238
                detailedStatus = tab.find("b").text.replace(subStr, '').strip()
239
 
240
            tab = tab.next_sibling.next_sibling
241
            trs = tab.find("table").find('tbody').findAll("tr", recursive = False)
242
 
243
            estimatedDelivery = trs[0].td.find("b").next_sibling.strip()
244
 
245
            orderItemTrs = trs[1].findAll("td", recursive=False)[1].table.tbody.findAll("tr", recursive = False)
246
            i = -1
247
            for orderItemTr in orderItemTrs:
248
                i += 1
249
                if i%2 == 0:
250
                    continue
251
                counter += 1
252
                quantity =  int(re.findall(r'\d+', orderItemTr.td.contents[0])[0])
253
 
254
                productUrl = orderItemTr.td.contents[1].a["href"]
255
                productTitle = orderItemTr.td.contents[1].a.text
256
 
257
                unitPrice = int(float(orderItemTr.findAll('td')[1].span.text.replace('Rs. ','').replace(',','')))
258
 
259
 
260
                subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, unitPrice*quantity, status, quantity)
261
                subOrder.merchantSubOrderId = str(counter) + " of " + merchantOrder.merchantOrderId
262
                subOrder.estimatedDeliveryDate = estimatedDelivery
14722 amit.gupta 263
                estDlvyTime = datetime.strptime(estimatedDelivery.split('-')[0].strip(), "%A %d %B %Y")
264
                createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)
265
                subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))
14464 amit.gupta 266
                subOrder.productCode = productUrl.split('/')[5]
267
                subOrder.detailedStatus = detailedStatus
18028 amit.gupta 268
                (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, unitPrice)
15346 amit.gupta 269
                dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
270
                subOrder.dealRank = dealRank.get('rank')
271
                subOrder.rankDesc = dealRank.get('description')
16283 amit.gupta 272
                subOrder.maxNlc = dealRank.get('maxNlc')
273
                subOrder.minNlc = dealRank.get('minNlc')
274
                subOrder.db = dealRank.get('dp')
275
                subOrder.itemStatus = dealRank.get('status')
14464 amit.gupta 276
                cashbackStatus = Store.CB_PENDING
277
                if cashbackAmount <= 0:
278
                    cashbackStatus = Store.CB_NA
279
                subOrder.cashBackStatus = cashbackStatus
18028 amit.gupta 280
                subOrder.cashBackAmount = cashbackAmount*quantity
14464 amit.gupta 281
                if percentage > 0:
282
                    subOrder.cashBackPercentage = percentage
283
                subOrders.append(subOrder)
284
        priceList = paymentAnchor.next_sibling.next_sibling.next_sibling.table.table.tbody.tbody.tbody.findAll('tr', recursive=False)
285
        totalAmount = 0
286
        grandAmount = 0
287
        for price in priceList:
288
            labelTd = price.td
289
            if 'Subtotal:' in labelTd.text:
290
                totalAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))
291
            elif 'Grand Total:' in labelTd.text:
292
                grandAmount += int(float(labelTd.next_sibling.next_sibling.find('span').contents[-1].replace(',','')))
293
        if grandAmount < totalAmount:
294
            diff = totalAmount - grandAmount
295
            for subOrder in merchantOrder.subOrders:
296
                subOrder.amountPaid -= int(diff*(1-subOrder.amountPaid/totalAmount))
14698 amit.gupta 297
        merchantOrder.status='success'
14464 amit.gupta 298
        self._updateToOrder(todict(merchantOrder))
18376 amit.gupta 299
 
300
    def parseAnotherStlye(self, merchantOrder, pqobj):
301
        counter=0
302
        detailSection = None
303
        shipmentSection = None
304
        summarySection = None
305
        for el in pqobj('.a-section'):
306
            if "View order details" in pq(el).text():
307
                detailSection =  pq(el)
308
                counter += 1
309
            if "Shipment details" in pq(el).text():
310
                shipmentSection =  pq(el)
311
                counter += 1
312
            if "Order Summary" in pq(el).text():
313
                summarySection = pq(el)
314
                counter += 1
315
            if counter == 3:
316
                break
317
 
318
        i=-1    
319
        for s in shipmentSection('.a-box-group'):
320
            shipmentGroup = pq(s)
321
            for shipment in shipmentGroup('.a-box'):
322
                i += 1 
323
                if i==0:
324
                    continue
325
                shipment = pq(shipment)
326
                shipmentStatusSection = shipment('.a-section:eq(0)')
327
                productDetails = shipment('.a-section:eq(1)>.a-row')
18380 amit.gupta 328
                print shipmentStatusSection('h3').text(), shipmentStatusSection('p').text(), shipmentStatusSection('span').text()
18376 amit.gupta 329
                for productDetail in productDetails:
330
                    productDetail = pq(productDetail)
331
                    pImg = productDetail.children('div').eq(0)
332
                    pQty = productDetail.children('div').eq(1)
333
                    #print pImg('a').attr('href'), pImg('a').attr('title'), pImg('img').attr('src'), pQty('.a-row:nth-child(2)')('span').text().split(':')[1].strip(), pQty('span.currencyINR')
18725 amit.gupta 334
                    productUrl = pImg('a').attr('href'), 
335
                    qty = pQty('.a-row:nth-child(2)')('span').text().split(':')[1].strip()
336
                    price = int(float(pQty('nobr').text().replace('Rs.','').replace(',', '')))
337
                    #subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)
18376 amit.gupta 338
        raise   
339
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
340
        merchantOrder.placedOn
341
        merchantOrder.merchantOrderId
342
        merchantOrder.paidAmount
14464 amit.gupta 343
 
344
    def parseNewStlye(self, merchantOrder, soup):
20498 amit.gupta 345
        isPrime = False
14464 amit.gupta 346
        merchantOrder.orderTrackingUrl = merchantOrder.orderSuccessUrl
20498 amit.gupta 347
        for script in soup.findAll("script"):
348
            if script.text:
349
                print script.text
350
                if "\"isPrime\":" in script.text:
351
                    isPrime = "\"isPrime\":1" in script.text
352
                    break
14464 amit.gupta 353
        orderDetailsContainer = soup.body.find(id="orderDetails")
14566 amit.gupta 354
        divAfterH1 = orderDetailsContainer.h1.next_sibling.next_sibling
355
        orderLeftDiv = divAfterH1.div
14464 amit.gupta 356
        placedOnSpan = orderLeftDiv.find("span", {'class':'order-date-invoice-item'})
357
        merchantOrder.placedOn =placedOnSpan.text.split('Ordered on')[1].strip()
358
        merchantOrder.merchantOrderId = placedOnSpan.next_sibling.next_sibling.text.split('Order#')[1].strip()
15555 amit.gupta 359
        try:
360
            priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box-inner"}).div.div.findAll('div', recursive=False)[-1]
361
        except:
362
            priceBox = divAfterH1.next_sibling.next_sibling.next_sibling.next_sibling.find("div", {"class":"a-box a-last"}).div.div.findAll('div', recursive=False)[-1]
14464 amit.gupta 363
        priceRows = priceBox.findAll('div', {'class':'a-row'})
364
        subTotal = 0
14566 amit.gupta 365
        shippingPrice = 0   
14464 amit.gupta 366
        promoApplied = 0
367
        for priceRow in priceRows:
368
            if "Item(s) Subtotal:" in str(priceRow):
369
                subTotal = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
370
            elif "Shipping:" in str(priceRow):
371
                shippingPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
372
            elif "Grand Total:" in str(priceRow):
373
                grandPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
374
                merchantOrder.paidAmount = grandPrice
375
            elif "Total:" in str(priceRow):
376
                totalPrice  = int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
377
            elif "Promotion Applied:" in str(priceRow):
378
                promoApplied  += int(float(priceRow.div.next_sibling.next_sibling.span.span.text.replace('Rs.','').replace(',', '')))
379
        totalPaid = subTotal        
380
        if promoApplied > 0:
381
            totalPaid -= promoApplied
382
            if shippingPrice <= promoApplied:
383
                totalPaid  += shippingPrice
15473 amit.gupta 384
 
17601 amit.gupta 385
        shipmentDivs = orderDetailsContainer.findAll('div', class_='shipment')
14464 amit.gupta 386
        subOrders = []
387
        merchantOrder.subOrders = subOrders
15473 amit.gupta 388
        closedStatus = True
17601 amit.gupta 389
        subOrders = []
18161 amit.gupta 390
        j=0
14464 amit.gupta 391
        for shipmentDiv in shipmentDivs:
17601 amit.gupta 392
            shipmentDiv = shipmentDiv.div
17361 amit.gupta 393
            try:
394
                trackingUrl = 'http://www.amazon.in/' + shipmentDiv.find('span', class_='track-package-button').span.a.get('href')
395
            except:
396
                trackingUrl= None
397
 
17347 amit.gupta 398
            deliverySpanTop = None
14464 amit.gupta 399
            innerBoxes = shipmentDiv.findAll('div', recursive = False)
400
            statusDiv = innerBoxes[0]
401
            subOrderStatus = statusDiv.div.span.text.strip()
17347 amit.gupta 402
            try:
403
                deliverySpanTop = statusDiv.div.div.find_next_sibling('div').span
404
            except:
17617 amit.gupta 405
                pass
18030 amit.gupta 406
            #if not deliverySpanTop:
407
            productDivs = innerBoxes[-1].div.div.findAll('div', recursive=False)
14464 amit.gupta 408
            merchantOrder.subOrders = subOrders
409
            for i, productDiv in enumerate(productDivs):
17347 amit.gupta 410
                deliverySpan = deliverySpanTop
411
                if not deliverySpanTop:
412
                    if i%2==1:
413
                        continue
18028 amit.gupta 414
                    deliverySpan = productDiv.div.div.span
18156 amit.gupta 415
                    try:
416
                        productDiv = productDivs[i+1]
417
                    except:
418
                        pass
18028 amit.gupta 419
                for  prodRow in productDiv.findAll('div', recursive=False):
420
                    j += 1
421
                    imgDiv  = prodRow.div.div
422
                    detailDiv = imgDiv.find_next_sibling('div')
423
                    detailDivs = detailDiv.findAll('div', recursive=False)
424
                    arr = re.split("^(\d+) of", detailDivs[0].a.text.strip())
425
                    (productTitle, quantity) = (arr[-1], (1 if len(arr)<2 else int(arr[1])) )
14756 amit.gupta 426
                    try:
18028 amit.gupta 427
                        unitPrice = int(float(detailDivs[2].span.text.replace('Rs. ','').replace(',','')))
14756 amit.gupta 428
                    except:
18028 amit.gupta 429
                        unitPrice = int(float(detailDivs[3].span.text.replace('Rs. ','').replace(',','')))
430
                    amountPaid = int((unitPrice*quantity*totalPaid)/subTotal)
431
                    productUrl = "http://www.amazon.in" + detailDivs[0].a.get('href')
432
                    subOrder = SubOrder(productTitle, productUrl, merchantOrder.placedOn, amountPaid, MStore.ORDER_PLACED, quantity)
433
                    subOrder.productCode = productUrl.split('/')[5]
434
                    subOrder.unitPrice = unitPrice
435
                    subOrder.merchantSubOrderId = str(j) + " of " + merchantOrder.merchantOrderId
436
                    estDlvyTime = datetime.now()
437
                    if deliverySpan is not None:
438
                        try:
439
                            subOrder.estimatedDeliveryDate = deliverySpan.span.text.strip()
440
                            estDate = subOrder.estimatedDeliveryDate.split("-")[0].strip()
441
                            subOrder.estimatedDeliveryInt = int(time.mktime((datetime.strptime(estDate, "%A %d %B %Y")).timetuple()))
442
                            estDlvyTime = datetime.strptime(estDate, "%A %d %B %Y")
443
                        except:
444
                            if "Delivered on" in deliverySpan.text:
445
                                subOrder.deliveredOn = deliverySpan.text.split(":")[1].strip() 
446
                            subOrder.estimatedDeliveryDate = "Not available"
447
 
448
                    createdOn = datetime.fromtimestamp(merchantOrder.createdOnInt)
449
                    subOrder.trackAfter = int(time.mktime(max(estDlvyTime-timedelta(days=4),createdOn + timedelta(days=3)).timetuple()))
450
                    subOrder.detailedStatus = subOrderStatus
451
                    status=MStore.ORDER_PLACED
17599 amit.gupta 452
                    try:
18028 amit.gupta 453
                        status = self._getStatusFromDetailedStatus(subOrderStatus)
17599 amit.gupta 454
                    except:
455
                        try:
18028 amit.gupta 456
                            dateString = subOrderStatus.split("Delivered ")[1].strip()
457
                            subOrder.status = MStore.ORDER_DELIVERED
458
                            subOrder.detailedStatus = 'Delivered'
459
                            dateString = getDateStringDelivered(dateString)
17631 amit.gupta 460
                            if dateString is not None:
18028 amit.gupta 461
                                subOrder.deliveredOn = datetime.strftime(dateString, '%d-%b-%y')
17599 amit.gupta 462
                        except:
18028 amit.gupta 463
                            try:
464
                                dateString = subOrderStatus.split("Arriving ")[1].split("by")[0].strip()
465
                                subOrder.status = MStore.ORDER_SHIPPED
466
                                dateString = getDateStringArriving(dateString)
467
                                if dateString is not None:
468
                                    subOrder.deliveryEstimate = datetime.strftime(dateString, '%d-%b-%y')
469
                                else:
470
                                    subOrder.deliveryEstimate = subOrderStatus.split("Arriving ")[1].split("by")[0].strip()
471
                            except:
472
                                print "Unknown status Alert -", status
473
 
474
                    subOrder.deliveryCharges = shippingPrice
475
                    if trackingUrl:
18156 amit.gupta 476
                        subOrder.trackingUrl  = trackingUrl
18028 amit.gupta 477
                    subOrder.imgUrl = imgDiv.img["src"]
20498 amit.gupta 478
                    if isPrime:
479
                        (cashbackAmount, percentage) = (0,0)
480
                    else:
481
                        (cashbackAmount, percentage) = self.getCashbackAmount(subOrder.productCode, amountPaid/quantity)
18028 amit.gupta 482
                    dealRank = getDealRank(subOrder.productCode, self.store_id, merchantOrder.userId)
483
                    subOrder.dealRank = dealRank.get('rank')
484
                    subOrder.rankDesc = dealRank.get('description')
18384 amit.gupta 485
                    subOrder.maxNlc = dealRank.get('maxNlc')
486
                    subOrder.minNlc = dealRank.get('minNlc')
487
                    subOrder.db = dealRank.get('dp')
488
                    subOrder.itemStatus = dealRank.get('status')
18028 amit.gupta 489
                    cashbackStatus = Store.CB_PENDING
490
                    if cashbackAmount <= 0:
491
                        cashbackStatus = Store.CB_NA
492
                    subOrder.cashBackStatus = cashbackStatus
493
                    subOrder.cashBackAmount = cashbackAmount*quantity
494
                    if percentage > 0:
495
                        subOrder.cashBackPercentage = percentage
496
                    if hasattr(subOrder, 'deliveredOn') or subOrder.status==Store.ORDER_DELIVERED:                               
497
                        subOrder.status = Store.ORDER_DELIVERED
498
                        subOrder.closed = True
499
                        if subOrder.cashBackStatus == Store.CB_PENDING:
500
                            subOrder.cashBackStatus = Store.CB_APPROVED
501
                    elif closedStatus:
502
                        closedStatus= False
503
                    subOrders.append(subOrder)
14698 amit.gupta 504
        merchantOrder.status='success'
15473 amit.gupta 505
        merchantOrder.closed = closedStatus
14464 amit.gupta 506
        self._updateToOrder(todict(merchantOrder))
14545 amit.gupta 507
 
508
    def parseCancelled(self, merchantOrder,soup):
14722 amit.gupta 509
        try:
510
            fonts = soup.body.findAll("table", recursive=False)[1].findAll("font")
511
            if fonts[0].text == "Important Message":
512
                if fonts[1].text=="This order has been cancelled.":
513
                    merchantOrder.closed = True
514
                    merchantOrder.status = "cancelled"
515
                    merchantOrder.requireDetail = False
516
                    self._updateToOrder(todict(merchantOrder))
517
                    return
518
                else:
519
                    raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
14545 amit.gupta 520
            else:
521
                raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
14722 amit.gupta 522
        except:
14549 amit.gupta 523
            orderDetails = soup.body.find(id="orderDetails")
524
            if orderDetails is not None and orderDetails.h4.text == "This order has been cancelled.":
525
                merchantOrder.closed = True
14698 amit.gupta 526
                merchantOrder.status = "cancelled"
14549 amit.gupta 527
                merchantOrder.requireDetail = False
528
                self._updateToOrder(todict(merchantOrder))
529
            else:
530
                raise ParseException("parseCancelled", "Found detailed status" + fonts[1].text)
14464 amit.gupta 531
 
13868 amit.gupta 532
    def getTrackingUrls(self, userId):
14074 amit.gupta 533
 
534
        missingOrderUrls = []
535
        missingOrders = self._getMissingOrders({'userId':userId})
536
        for missingOrder in missingOrders:
537
            missingOrderUrls.append(ORDER_REDIRECT_URL%(missingOrder['merchantOrderId']))
17453 amit.gupta 538
        orders = self._getActiveOrders({'userId':userId})
13882 amit.gupta 539
        count = len(orders)
13879 amit.gupta 540
        print "count", count
14948 amit.gupta 541
        print "Missing Urls"
542
        print "*************"
543
        print missingOrderUrls
13868 amit.gupta 544
        if count > 0:
14684 amit.gupta 545
            return missingOrderUrls + ['https://www.amazon.in/gp/css/order-history', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled', 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled&startIndex=10']
13959 amit.gupta 546
        else: 
14074 amit.gupta 547
            return missingOrderUrls
13927 amit.gupta 548
 
549
    def trackOrdersForUser(self, userId, url, rawHtml):
17718 amit.gupta 550
        rawHtml = re.sub(r'[^\x00-\x7F]+',' ', rawHtml)
14985 amit.gupta 551
        directory = "/AmazonTrack/User" + str(userId)
14285 amit.gupta 552
        if not os.path.exists(directory):
553
            os.makedirs(directory)
14173 amit.gupta 554
 
14945 amit.gupta 555
 
13995 amit.gupta 556
        try:
557
            searchMap = {'userId':userId}
558
            collectionMap = {'merchantOrderId':1}
559
            activeOrders = self._getActiveOrders(searchMap, collectionMap)
14033 amit.gupta 560
            datetimeNow = datetime.now()
561
            timestamp = int(time.mktime(datetimeNow.timetuple()))
14009 amit.gupta 562
            print "url----------------", url
17465 amit.gupta 563
            cancelledSummary = False
14320 amit.gupta 564
            if url == 'https://www.amazon.in/gp/css/order-history' or 'https://www.amazon.in/gp/css/order-history/?orderFilter=cancelled' in url:
14945 amit.gupta 565
                if url == 'https://www.amazon.in/gp/css/order-history':
566
                    filename = directory + "/orderSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')
567
                else:   
17465 amit.gupta 568
                    filename = directory + "/cancelledSummary" + datetime.strftime(datetime.now(), '%d-%m:%H:%M:%S')
569
                    cancelledSummary = True   
14945 amit.gupta 570
                f = open(filename,'w')
571
                f.write(rawHtml) # python will convert \n to os.linesep
572
                f.close() # you can omit in most cases as the destructor will call if
18158 amit.gupta 573
                soup = BeautifulSoup(rawHtml,'html5lib')
13995 amit.gupta 574
                allOrders = soup.find(id="ordersContainer").findAll('div', {'class':'a-box-group a-spacing-base order'})
575
                bulk = self.db.merchantOrder.initialize_ordered_bulk_op()
576
                for activeOrder in activeOrders:
17307 amit.gupta 577
                    matched=False
13995 amit.gupta 578
                    for orderEle in allOrders:
17254 amit.gupta 579
                        deliveredOn = None
580
                        deliveryEstimate = None
17258 amit.gupta 581
                        shippingEstimate = None
13995 amit.gupta 582
                        orderdiv = orderEle.find('div', {'class':'a-box a-color-offset-background order-info'}).find('div', {'class':'a-fixed-right-grid-col actions a-col-right'})
583
                        merchantOrderId = orderdiv.find('span', {'class':'a-color-secondary value'}).text.strip()
584
                        if merchantOrderId==activeOrder['merchantOrderId']:
17307 amit.gupta 585
                            matched=True
13995 amit.gupta 586
                            closed = True
17465 amit.gupta 587
                            if not cancelledSummary:
588
                                shipments = orderEle.findAll('div',{'class':re.compile('.*?shipment.*?')}, recursive=False)
589
                            else:
590
                                shipments = orderEle.findAll('div',{'class':re.compile('.*?a-box.*?')}, recursive=False)
591
                                shipments.pop(0)
13995 amit.gupta 592
                            for shipment in shipments:
17307 amit.gupta 593
                                orderStatusDesc = None
13995 amit.gupta 594
                                shipdiv = shipment.find('div', {'class':'a-box-inner'})
14050 amit.gupta 595
                                sdivs = shipment.div.div.findAll('div', recursive=False)
17254 amit.gupta 596
                                try:
17270 amit.gupta 597
                                    orderStatus = sdivs[0].span.text.strip()
598
                                    status = self._getStatusFromDetailedStatus(orderStatus)
17254 amit.gupta 599
                                except:
17270 amit.gupta 600
                                    try:
601
                                        dateString = orderStatus.split("Delivered ")[1].strip()
602
                                        status = MStore.ORDER_DELIVERED
603
                                        deliveredOn = datetime.strftime(getDateStringDelivered(dateString), '%d-%b-%y')
604
                                    except:
605
                                        try:
17271 amit.gupta 606
                                            dateString = sdivs[0].span.text.strip().split("Arriving ")[1].split("by")[0].strip()
17270 amit.gupta 607
                                            status = MStore.ORDER_SHIPPED
608
                                            deliveryEstimate = datetime.strftime(getDateStringArriving(dateString), '%d-%b-%y')
609
                                        except:
610
                                            print "Unknown status Alert -", orderStatus
17348 amit.gupta 611
                                print merchantOrderId, "Order Status", orderStatus
17256 amit.gupta 612
                                try:
17270 amit.gupta 613
                                    orderStatusDesc = sdivs[0].findAll('div')[1].div.text.strip()
614
                                except:
17271 amit.gupta 615
                                    try:
616
                                        orderStatusDesc = sdivs[0].findAll('div')[1].text.strip()
617
                                    except:
17307 amit.gupta 618
                                        print "Order Status Description None or empty for", merchantOrderId, "and User", userId
17270 amit.gupta 619
 
17271 amit.gupta 620
                                if orderStatusDesc: 
17348 amit.gupta 621
                                    print merchantOrderId, "Order status desc", orderStatusDesc
17271 amit.gupta 622
                                    try:
17348 amit.gupta 623
                                        status = self._getStatusFromDetailedStatus(orderStatus)
624
                                    except:
625
                                        pass
626
                                    try:
17271 amit.gupta 627
                                        if "Dispatch estimate" in orderStatusDesc:
628
                                            shippingEstimate = orderStatus.split("Dispatch estimate").split("-")[0].strip()
629
                                        elif "Delivery estimate" in orderStatus:
630
                                            deliveryEstimate = orderStatus.split("Delivery estimate").split("-")[0].strip()
631
                                        elif "Arriving" in orderStatus:
632
                                            deliveryEstimate = datetime.strftime(getDateStringArriving(orderStatus.split("Arriving")[1].strip().split("by")[0].strip()), '%d-%b-%y')
633
                                    except:
634
                                        print "Could not find anything relevent for merchantOrder", merchantOrderId, "and User", userId 
635
                                        closed=False
636
                                        status = None
17258 amit.gupta 637
 
14335 amit.gupta 638
                                productDivs = shipdiv.find('div', {'class':re.compile('.*?a-spacing-top-medium.*?')}).find('div', {'class':'a-row'}).findAll('div', recursive=False)
13995 amit.gupta 639
                                trackingUrl = None
640
                                for buttonDiv in shipdiv.findAll('span', {'class':'a-button-inner'}):
641
                                    if buttonDiv.find('a').text.strip()=='Track package':
642
                                        trackingUrl = buttonDiv.find('a')['href'].strip()
643
                                        if not trackingUrl.startswith("http"):
14608 amit.gupta 644
                                            trackingUrl = "http://www.amazon.in" + trackingUrl
13995 amit.gupta 645
                                        break
646
                                for prodDiv in productDivs:
647
                                    prodDiv.find('div', {'class':'a-fixed-left-grid-inner'})
648
                                    productTitle = prodDiv.find('div', {'class':'a-fixed-left-grid-inner'}).find("div", {'class':'a-row'}).find('a').text.strip()
649
                                    imgUrl = prodDiv.find("img")["src"]
650
                                    for subOrder in activeOrder['subOrders']:
17307 amit.gupta 651
                                        if subOrder['closed']==True:
652
                                            continue
17230 amit.gupta 653
                                        if subOrder['productTitle'] in productTitle:
13995 amit.gupta 654
                                            findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId")}
655
                                            updateMap = {}
656
                                            closedStatus = False
657
                                            updateMap['subOrders.$.imgUrl'] = imgUrl
14033 amit.gupta 658
                                            updateMap['subOrders.$.lastTracked'] = timestamp
17258 amit.gupta 659
                                            if status:
17348 amit.gupta 660
                                                updateMap['subOrders.$.detailedStatus'] = orderStatus
17258 amit.gupta 661
                                                updateMap['subOrders.$.status'] = status 
662
                                                cashbackStatus = subOrder.get("cashBackStatus")
13995 amit.gupta 663
 
17251 amit.gupta 664
                                            if status==MStore.ORDER_DELIVERED:
17271 amit.gupta 665
                                                if deliveredOn:                               
17254 amit.gupta 666
                                                    updateMap['subOrders.$.deliveredOn'] = deliveredOn
13995 amit.gupta 667
                                                closedStatus = True
668
                                                updateMap['subOrders.$.closed'] = True
669
                                                if cashbackStatus == Store.CB_PENDING:
14607 amit.gupta 670
                                                    updateMap['subOrders.$.cashBackStatus'] = Store.CB_APPROVED
13995 amit.gupta 671
                                            if status==MStore.ORDER_CANCELLED:     
672
                                                closedStatus = True
673
                                                updateMap['subOrders.$.closed'] = True
674
                                                if cashbackStatus == Store.CB_PENDING:
675
                                                    updateMap['subOrders.$.cashBackStatus'] = Store.CB_CANCELLED
17254 amit.gupta 676
                                            if status==MStore.ORDER_SHIPPED:
677
                                                if deliveryEstimate:   
678
                                                    updateMap['subOrders.$.estimatedDeliveryDate'] = deliveryEstimate
13995 amit.gupta 679
                                                if trackingUrl is not None:
680
                                                    updateMap['subOrders.$.trackingUrl'] = trackingUrl
17322 amit.gupta 681
                                                    updateMap['subOrders.$.trackMissing'] = False
17258 amit.gupta 682
                                            if shippingEstimate:   
683
                                                updateMap['subOrders.$.estimatedShippingDate'] = shippingEstimate
13995 amit.gupta 684
                                            if not closedStatus:
685
                                                closed = False
17307 amit.gupta 686
                                            #{"subOrders.closed":False,"subOrders.trackingUrl":{"$exists":False},"subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}
17359 amit.gupta 687
                                            updateMap['status']='success'
13995 amit.gupta 688
                                            bulk.find(findMap).update({'$set' : updateMap})
689
                                            break
17321 amit.gupta 690
                            bulk.find({'orderId': activeOrder['orderId']}).update({"$set":{'closed':closed}})
17307 amit.gupta 691
                            break
692
                    if not matched:
17319 amit.gupta 693
                        updateMap = {'subOrders.$.trackMissing': True}
17307 amit.gupta 694
                        for subOrder in activeOrder['subOrders']:
695
                            if subOrder['closed']==True:
696
                                continue
697
                            findMap = {"orderId": activeOrder['orderId'], "subOrders.merchantSubOrderId": subOrder.get("merchantSubOrderId"), 
698
                                       "subOrders.trackAfter":{"$lt":utils.getCurrTimeStamp()}}
699
                            bulk.find({'orderId': activeOrder['orderId']})
700
                            bulk.find(findMap).update({'$set':updateMap})
14080 amit.gupta 701
                bulk.execute()
702
                return 'PARSED_SUCCESS'
703
            else:
15563 amit.gupta 704
                merchantOrderId = re.findall(r'https://www.amazon.in/gp/css/summary/edit.html\?orderID=(.*)?', url, re.IGNORECASE)[0]
705
                print  "merchantOrderId", merchantOrderId
14085 amit.gupta 706
                merchantOrder = self.db.merchantOrder.find_one({"merchantOrderId":merchantOrderId})
14945 amit.gupta 707
 
708
                filename = directory + "/" + merchantOrderId   
709
                f = open(filename,'w')
710
                f.write(rawHtml) # python will convert \n to os.linesep
711
                f.close() # you can omit in most cases as the destructor will call if
15555 amit.gupta 712
                result = self.parseOrderRawHtml(merchantOrder['orderId'], merchantOrder['subTagId'], merchantOrder['userId'], rawHtml, url, True)['result']
15562 amit.gupta 713
                print "result", result
15555 amit.gupta 714
                try:
15560 amit.gupta 715
                    order1 = session.query(OrdersRaw).filter_by(id=merchantOrder['orderId']).first()
716
                    order1.status = result
717
                    order1.rawhtml = rawHtml
17358 amit.gupta 718
                    order1.order_url = url
15555 amit.gupta 719
                    session.commit()
15561 amit.gupta 720
                except:
721
                    traceback.print_exc()
15555 amit.gupta 722
                finally:
723
                    session.close()
14080 amit.gupta 724
                return 'PARSED_SUCCESS'
725
                pass
14008 amit.gupta 726
            return 'PARSED_SUCCESS_NO_ORDERS'
13995 amit.gupta 727
        except:
728
            traceback.print_exc()    
729
            return 'PARSED_FAILED'
730
 
731
    def _getStatusFromDetailedStatus(self, detailedStatus):
14061 amit.gupta 732
        if "ordered from" in detailedStatus.lower():
733
            return MStore.ORDER_PLACED 
734
 
13995 amit.gupta 735
        for key, value in self.orderStatusRegexMap.iteritems():
14032 amit.gupta 736
            if detailedStatus.lower() in value:
13995 amit.gupta 737
                return key
738
 
17253 amit.gupta 739
        print "Detailed Status need to be mapped", "Store:", self.store_id, detailedStatus
13995 amit.gupta 740
        raise ParseException("_getStatusFromDetailedStatus", "Found new order status" + detailedStatus)
14624 amit.gupta 741
    def scrapeAffiliate(self, startDate=None, endDate=None):
14650 amit.gupta 742
        br = getBrowserObject()
743
        br.add_password('https://assoc-datafeeds-eu.amazon.com', 'Saholic', 'Fnubyvp')
744
        url = AMAZON_AFF_URL
745
        response = br.open(url)
746
        #get data for past 40 days and store it to mongo
747
        dt = datetime.now()
14701 amit.gupta 748
        dat = dt - timedelta(days=2)
14651 amit.gupta 749
        url = AMAZON_AFF_FILE_URL%(datetime.strftime(dat, "%Y%m%d"))
750
        response = br.open(url)
751
        page = gzip.GzipFile(fileobj=response, mode='rb').read()
752
        j=-1
753
        for row in page.split("\n"):
754
            j += 1
755
            if j== 0 or j==1:
756
                continue
757
            fields = row.split("\t")
758
            if len(fields)>1:
759
                print fields
760
                amazonAffiliate = AmazonAffiliateInfo(fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], fields[8], fields[9])
761
                print amazonAffiliate
762
                self.db.amazonAffiliateInfo.insert(todict(amazonAffiliate))
763
            else:
764
                break
14650 amit.gupta 765
 
14608 amit.gupta 766
 
16468 amit.gupta 767
    def parseTrackingUrl(self, trackingUrl, orderId):
16474 amit.gupta 768
        print trackingUrl
14624 amit.gupta 769
        subOrder = {}
14749 amit.gupta 770
        page = fetchResponseUsingProxy(trackingUrl)
16468 amit.gupta 771
        status = MStore.ORDER_SHIPPED
772
        #print page
14624 amit.gupta 773
        soup = BeautifulSoup(page)
16468 amit.gupta 774
        header1 = soup.find("h1") 
775
        if header1:
18727 amit.gupta 776
            if header1.text=="Sign In" or header1.text.strip()=="Login":
16468 amit.gupta 777
                print "Login page is displayed for order id", orderId
17307 amit.gupta 778
                self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})
18725 amit.gupta 779
                subOrder['login'] = True
14624 amit.gupta 780
                return subOrder
16468 amit.gupta 781
        try:
16474 amit.gupta 782
            print "Tracking page is displayed for order id", orderId
16468 amit.gupta 783
            detailedStatus = soup.find("div", {"class":"top"}).span.text.strip()
784
            try:
785
                displayStatus = soup.find("div",{"class":"a-column a-span12 shipment-status-content"}).span.text.strip()
786
            except:
787
                displayStatus = detailedStatus
788
            print displayStatus
789
            if detailedStatus.lower().find("delivered")>=0:
790
                print detailedStatus
791
                displayStatus = "Delivered"
792
                status = "Delivered"
793
                try:
794
                    subOrder["deliveredOn"] = detailedStatus.split("on")[1].strip()
795
                except:
796
                    pass
797
 
798
            elif detailedStatus.lower() == 'returned':
799
                status = 'Cancelled'
800
            subOrder['status'] = status
801
            subOrder['detailedStatus'] = displayStatus 
17350 amit.gupta 802
            self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":False}})    
16468 amit.gupta 803
 
804
        except:
17350 amit.gupta 805
            self.db.merchantOrder.update({"orderId":orderId}, {"$set":{"trackError":True}})    
16468 amit.gupta 806
            print "failed to parse", orderId
807
            traceback.print_exc()
808
 
14624 amit.gupta 809
        return subOrder
810
 
811
 
812
 
16980 amit.gupta 813
    def parseInfo(self,):
814
        from pyquery import PyQuery as pq
16986 amit.gupta 815
        orders = list(session.query(Orders).filter_by(store_id=self.store_id).filter_by(status='DETAIL_CREATED').group_by(Orders.user_id).all())
16980 amit.gupta 816
        try:
817
            for order in orders:
818
                try:
819
                    doc = pq(order.rawhtml)
820
                    #a1= " ".join(["" if not div.text else div.text.replace("\t","").replace("\n","").replace(" ", "") for div in pq(doc('article')[-1])('div')])
821
                    lists = doc('ul.displayAddressUL li')
16986 amit.gupta 822
                    orderInfo = All_user_addresses()
16980 amit.gupta 823
                    orderInfo.address = lists[-3].text
824
                    orderInfo.user_id = order.user_id
16986 amit.gupta 825
                    orderInfo.source = 'order' 
826
                    #orderInfo.order_id = order.id
827
                    #orderInfo.email = None
828
                    #orderInfo.name = lists[0].text 
829
                    #orderInfo.mobile = None
16980 amit.gupta 830
                    adSplit = lists[-2].text.split(",")
831
                    match = re.match(r"([a-z ]+)([0-9]+)", adSplit[1], re.I)
832
                    if match:
833
                        items = match.groups()
16986 amit.gupta 834
                    orderInfo.city = adSplit[0].strip()
835
                    orderInfo.pincode = items[1].strip()
836
                    orderInfo.state = items[0].strip().title()
16980 amit.gupta 837
                    session.commit()
838
                except:
839
                    session.rollback()
840
                    continue
841
        finally:
842
            session.close()
14608 amit.gupta 843
 
13927 amit.gupta 844
 
845
 
13774 amit.gupta 846
def main():
16874 amit.gupta 847
    store = getStore(1)
20498 amit.gupta 848
    store.parseOrderRawHtml("444444", '123', 14, readSSh('/home/amit/amit.txt'), 'https://www.amazon.in/gp/css/summary/edit.html?orderID=402-0540293-4683515')
849
#    orders = list(session.query(OrdersRaw).filter_by(status = 'DETAIL_NOT_CREATED_UNKNOWN').filter(OrdersRaw.id > 61071).all())
850
#    session.close()
851
#    for o in orders:
852
#        try:
853
#            store.trackOrdersForUser(o.id, o.order_url, o.rawhtml)
854
#        finally:
855
#            session.close()
17631 amit.gupta 856
    #store.trackOrdersForUser(10466, 'https://www.amazon.in/gp/css/summary/edit.html?orderID=403-7498756-0837158', readSSh('/AmazonTrack/User10466/403-7498756-0837158'))
18030 amit.gupta 857
    #store.trackOrdersForUser(46195, 'https://www.amazon.in/gp/css/summary/edit.html?orderID=404-4294022-1187515', readSSh('/home/amit/amazon.html'))
17231 amit.gupta 858
 
859
def getSummaryFile(directory):
860
    date1 = datetime(2015,1,1)
861
    finalFile = None
862
    try:
863
        for file in os.listdir(directory):
864
            if file.startswith("orderSummary"):
865
                date2 = datetime.strptime("2015-" + file.split("orderSummary")[1].split(":")[0], "%Y-%d-%m")
866
                if date2 > date1:
867
                    date1 = date2
868
                    finalFile=file
869
    except:
870
        print "Missing directory"
17251 amit.gupta 871
    return finalFile 
17231 amit.gupta 872
 
15555 amit.gupta 873
 
874
def parseDetailNotCreated():
875
    try:
876
        store=getStore(1)
877
        orders = session.query(OrdersRaw).filter_by(status='DETAIL_NOT_CREATED_UNKNOWN').all()
15558 amit.gupta 878
        session.close()
15555 amit.gupta 879
        for order in orders:
880
            store.trackOrdersForUser(order.id, order.order_url, order.rawhtml)
881
 
882
    finally:
883
        session.close()
17251 amit.gupta 884
 
885
def getDateStringDelivered(dateString='Monday'):
17254 amit.gupta 886
    print dateString
17251 amit.gupta 887
    if dateString.lower()=='today':
888
        return date.today()
889
    if dateString.lower()=='yesterday':
890
        return date.today() - timedelta(days=1)
891
    try:
892
        return datetime.strptime(dateString, '%d-%b-%y')
893
    except:
894
        try:
895
            #get Closest Date from today
896
            curDate = date.today()
897
            curTime = datetime(curDate.year, curDate.month, curDate.day)
898
            curYear = curDate.year
899
            prevYear = curYear - 1
17254 amit.gupta 900
            dateMax = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")
901
            dateMin = datetime.strptime(dateString + " " + str(prevYear), "%A, %d %b %Y")
17251 amit.gupta 902
            if dateMax <= curTime:
903
                return dateMax
904
            else:
905
                return dateMin 
906
        except:
17254 amit.gupta 907
            try:
908
                days_of_week = ['sunday','monday','tuesday','wednesday',
909
                            'thursday','friday','saturday']
910
                deltaDays = curDate.isoweekday() - days_of_week.index(dateString.lower())
911
                if deltaDays <= 0:
912
                    deltaDays= deltaDays + 7
913
                curDate = curDate - timedelta(days=deltaDays)
914
                print datetime.strftime(curDate, '%d-%b-%y')
915
                return curDate
916
            except:
917
                print "could not parse"
918
                return None
15565 amit.gupta 919
 
17254 amit.gupta 920
def getDateStringArriving(dateString='Thursday'):
921
    print dateString
922
    if dateString.lower()=='today':
923
        return date.today()
924
    if dateString.lower()=='tomorrow':
925
        return date.today() + timedelta(days=1)
926
    try:
927
        return datetime.strptime(dateString, '%d-%b-%y')
928
    except:
929
        try:
930
            #get Closest Date from today
931
            curDate = date.today()
932
            curTime = datetime(curDate.year, curDate.month, curDate.day)
933
            curYear = curDate.year
934
            nextYear = curYear + 1
935
            dateMin = datetime.strptime(dateString + " " + str(curYear), "%A, %d %b %Y")
936
            dateMax = datetime.strptime(dateString + " " + str(nextYear), "%A, %d %b %Y")
937
            if dateMin >= curTime:
938
                return dateMin
939
            else:
940
                return dateMax 
941
        except:
942
            try:
943
                days_of_week = ['sunday','monday','tuesday','wednesday',
944
                            'thursday','friday','saturday']
945
                deltaDays = days_of_week.index(dateString.lower()) - curDate.isoweekday()
946
                if deltaDays < 0:
947
                    deltaDays= deltaDays + 7
948
                curDate = curDate + timedelta(days=deltaDays)
949
                return curDate
950
            except:
951
                print "Could not parse"
952
                return None 
953
 
18028 amit.gupta 954
def main1():
955
    store = getStore(1)
956
    for merchantOrder in store.db.merchantOrder.find({"subOrders":{"$elemMatch":{"cashBackStatus":"Not Applicable", "cashBackPercentage":{"$gt":0}}}}):
957
        mo = obj(merchantOrder)
958
        for subOrder in mo.subOrders:
959
            subOrder.closed=False 
960
        print "orderId", mo.orderId
961
        store.populateDerivedFields(mo, False)
962
        store.db.merchantOrder.update({"orderId":mo.orderId}, {"$set":todict(mo)})
963
        break
964
 
965
 
15565 amit.gupta 966
def parseOrderNotCreated():
967
    try:
968
        store=getStore(1)
969
        orders = session.query(OrdersRaw).filter_by(status='ORDER_NOT_CREATED_UNKNOWN').all()
970
        session.close()
971
        for order in orders:
15567 amit.gupta 972
            result = store.parseOrderRawHtml(order.id, order.sub_tag, order.user_id, order.rawhtml, order.order_url)['result']
973
            order1 = session.query(OrdersRaw).filter_by(id=order.id).first()
974
            order1.status = result
975
            session.commit()
15565 amit.gupta 976
    finally:
977
        session.close()
15555 amit.gupta 978
 
13774 amit.gupta 979
if __name__ == '__main__':
20771 amit.gupta 980
    #readSSh("~/AmazonTrack/User18053/orderSummary07-01:00:33:08")
17599 amit.gupta 981
#    readSSh("/AmazonTrack/User10466/orderSummary18-11:21:32:36")
18156 amit.gupta 982
#    readSSh("/AmazonTrack/User5525/171-0333104-6169933")
20771 amit.gupta 983
    #main()
984
    store=getStore(1)
985
    store.parseOrderRawHtml(99999, 'd', 14, readSSh('/home/amit/sample.html'), 'https://www.amazon.in/gp/buy/thankyou/handlers/display.html?ie=UTF8&asins=B01DDP7D6W&orderId=406-9807873-3094728&purchaseId=404-46')