Subversion Repositories SmartDukaan

Rev

Rev 20472 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 20472 Rev 20694
Line 389... Line 389...
389
#        '>': '<',
389
#        '>': '<',
390
#        '&': '&',
390
#        '&': '&',
391
#        '"': '"', # should be escaped in attributes
391
#        '"': '"', # should be escaped in attributes
392
#        "'": '''    # should be escaped in attributes
392
#        "'": '''    # should be escaped in attributes
393
#        }
393
#        }
-
 
394
        secondryIdentiferSupcMap = {}
-
 
395
        for scriptTag in pq.items("script"):
-
 
396
            if  "var reqData1 =" in scriptTag.text():
-
 
397
                match = re.search("(\[.*?\])",scriptTag.text(), re.DOTALL)
-
 
398
                a = match.group(1)
-
 
399
                if a:
-
 
400
                    for mapElement in json.loads(a):
-
 
401
                        secondryIdentiferSupcMap[mapElement["pog_id"]] = mapElement["supc"]
-
 
402
                    break 
394
        jsonValue = pq("#orderJSON").attr("value")
403
        jsonValue = pq("#orderJSON").attr("value")
395
        jsonValue.replace(""", '"')
404
        jsonValue.replace(""", '"')
396
        jsonValue.replace("&", '&')
405
        jsonValue.replace("&", '&')
397
        jsonValue.replace(">", '>')
406
        jsonValue.replace(">", '>')
398
        jsonValue.replace("&lt;", '<')
407
        jsonValue.replace("&lt;", '<')
Line 402... Line 411...
402
            try:
411
            try:
403
                supcElement = pq(supcElement)
412
                supcElement = pq(supcElement)
404
                title = supcElement('div.order-heading').text().strip()
413
                title = supcElement('div.order-heading').text().strip()
405
                productUrl = supcElement.attr("data-href")
414
                productUrl = supcElement.attr("data-href")
406
                imgUrl = supcElement.find('img').attr('src')
415
                imgUrl = supcElement.find('img').attr('src')
407
                try:
-
 
408
                    supc = self.catalogdb.MasterData.find_one({"secondaryIdentifier":productUrl.split("/")[-1], "source_id":self.store_id})
416
                secondaryIdentifier = productUrl.split("/")[-1]
409
                    if supc is None:
417
                if secondryIdentiferSupcMap.has_key(secondaryIdentifier):
410
                        raise
-
 
411
                    else:
-
 
412
                        supc = supc['identifier'] 
418
                    supc = secondryIdentiferSupcMap[secondaryIdentifier]
413
                except:
419
                else:
414
                    try:
-
 
415
                        supc = imgUrl.split('-')[-3]
-
 
416
                    except:
-
 
417
                        supc = self.db.sdIdentifiers.find_one({"secondaryIdentifier":productUrl.split("/")[-1]})
420
                    supc = self.catalogdb.MasterData.find_one({"secondaryIdentifier": secondaryIdentifier, "source_id":self.store_id})
418
                        if supc is None:
-
 
419
                            raise
-
 
420
                        else:
421
                    if supc:
421
                            supc = supc['identifier']
422
                        supc = supc['identifier']
422
                        
-
 
423
                supcMap[supc] = {'title':title, 'imgUrl':imgUrl, 'productUrl':productUrl}
423
                supcMap[supc] = {'title':title, 'imgUrl':imgUrl, 'productUrl':productUrl}
424
            except:
424
            except:
425
                pass
425
                pass
426
        return json.loads(jsonValue)
426
        return json.loads(jsonValue)
427
 
427
 
Line 440... Line 440...
440
            try:
440
            try:
441
                if pq("title").text()=="Webpage not available":
441
                if pq("title").text()=="Webpage not available":
442
                    raise                
442
                    raise                
443
                orderJSON = self.getOrderJSON(pq, supcMap)
443
                orderJSON = self.getOrderJSON(pq, supcMap)
444
            except:
444
            except:
-
 
445
                traceback.print_exc()
445
                resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
446
                resp['result'] = 'ORDER_NOT_CREATED_KNOWN'
446
                return resp
447
                return resp
447
                '''page =fetchResponseUsingProxy(url)
448
                '''page =fetchResponseUsingProxy(url)
448
                try:
449
                try:
449
                    merchantOrder = self._parseB(orderId, subTagId, userId, page, orderSuccessUrl)
450
                    merchantOrder = self._parseB(orderId, subTagId, userId, page, orderSuccessUrl)
Line 469... Line 470...
469
             
470
             
470
        
471
        
471
        #soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)
472
        #soup = BeautifulSoup(rawHtml,convertEntities=BeautifulSoup.HTML_ENTITIES)
472
        #soup.find(name, attrs, recursive, text)
473
        #soup.find(name, attrs, recursive, text)
473
    def _parseC(self, orderId, subTagId, userId, supcMap, orderJSON, orderSuccessUrl):
474
    def _parseC(self, orderId, subTagId, userId, supcMap, orderJSON, orderSuccessUrl):
474
        print orderJSON
475
        print json.dumps(orderJSON)
475
        merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
476
        merchantOrder = Order(orderId, userId, subTagId, self.store_id, orderSuccessUrl)
476
        placedOn = datetime.strftime(utils.fromTimeStamp(orderJSON['created']/1000), "%a, %d %b, %Y")
477
        placedOn = datetime.strftime(utils.fromTimeStamp(orderJSON['created']/1000), "%a, %d %b, %Y")
477
        merchantOrder.placedOn = placedOn
478
        merchantOrder.placedOn = placedOn
478
        merchantOrder.merchantOrderId = orderJSON['code']
479
        merchantOrder.merchantOrderId = orderJSON['code']
479
        merchantOrder.paidAmount = orderJSON['paidAmount']
480
        merchantOrder.paidAmount = orderJSON['paidAmount']
480
        merchantOrder.deliveryCharges = orderJSON['shippingCharges']
481
        merchantOrder.deliveryCharges = orderJSON['shippingCharges']
481
        merchantOrder.closed= False
482
        merchantOrder.closed= False
482
        merchantSubOrders = []
483
        merchantSubOrders = []
483
        for s in orderJSON['suborders']:
484
        for s in orderJSON['suborders']:
484
            print s
-
 
485
            if not supcMap.has_key(s['supcCode']):
-
 
486
                skuData = Mongo.get_mongo_connection().Catalog.MasterData.find_one({'identifier':s['supcCode'], 'source_id':self.store_id})
-
 
487
                if skuData is None:
-
 
488
                    url = "http://www.snapdeal.com/search?keyword=%s"%s['supcCode']
-
 
489
                    html = utils.fetchResponseUsingProxy(url)
-
 
490
                    html = html.replace("&#13;", "")
-
 
491
                    pq = PyQuery(html)
-
 
492
                    tag = pq('div.product-tuple-listing')
-
 
493
                    for resultDiv in tag:
-
 
494
                        pq_resultDiv = pq(resultDiv)
-
 
495
                        if (pq_resultDiv.attr['data-defaultsupcforfmcg']).strip() == s['supcCode'].strip():
-
 
496
                            productUrl =  pq_resultDiv('div.product-tuple-image').children('a').attr('href')
-
 
497
                            imgUrl =  pq_resultDiv('div.product-tuple-image').children('a').children('img').attr('src')
-
 
498
                            title = pq_resultDiv('p.product-title').text()
-
 
499
                            break
-
 
500
                else:
-
 
501
                    title = skuData['product_name']
-
 
502
                    productUrl = skuData['marketPlaceUrl']
-
 
503
                    imgUrl = skuData['thumbnail']
-
 
504
                supcMap[s['supcCode']] = {'title':title, 'imgUrl':imgUrl, 'productUrl':productUrl}
-
 
505
            map1 = supcMap[s['supcCode']]
485
            map1 = supcMap[s['supcCode']]
506
                
486
                
507
            amountPaid = s['paidAmount']
487
            amountPaid = s['paidAmount']
508
            productTitle = map1['title']
488
            productTitle = map1['title']
509
            productUrl = map1['productUrl'] 
489
            productUrl = map1['productUrl'] 
Line 950... Line 930...
950
 
930
 
951
 
931
 
952
def main():
932
def main():
953
    #print todict([1,2,"3"])
933
    #print todict([1,2,"3"])
954
    store = getStore(3)
934
    store = getStore(3)
955
    store.scrapeStoreOrders()
935
    #store.scrapeStoreOrders()
956
    #store.parseOrderRawHtml(332222, "3232311", 2, readSSh("/home/amit/sample1.html"), "    https://m.snapdeal.com/purchaseMobileComplete?code=a8b3420d4c5bc248ea887df6c9b3a724&order=10478372453")
936
    store.parseOrderRawHtml(332222, "3232311", 2, readSSh("/home/amit/sample.html"), "https://m.snapdeal.com/purchaseMobileComplete?code=1a011639e72588db39169df568654620&order=17772748329&sdInstant=false")
957
    #store.scrapeAffiliate()
937
    #store.scrapeAffiliate()
958
    #https://m.snapdeal.com/purchaseMobileComplete?code=3fbc8a02a1c4d3c4e906f46886de0464&order=5808451506
938
    #https://m.snapdeal.com/purchaseMobileComplete?code=3fbc8a02a1c4d3c4e906f46886de0464&order=5808451506
959
    #https://m.snapdeal.com/purchaseMobileComplete?code=9f4dfa49ff08a16d04c5e4bf519506fc&order=9611672826
939
    #https://m.snapdeal.com/purchaseMobileComplete?code=9f4dfa49ff08a16d04c5e4bf519506fc&order=9611672826
960
    
940
    
961
#    orders = list(session.query(OrdersRaw).filter_by(store_id=3).filter_by(status='ORDER_NOT_CREATED').all())
941
#    orders = list(session.query(OrdersRaw).filter_by(store_id=3).filter_by(status='ORDER_NOT_CREATED').all())