Subversion Repositories SmartDukaan

Rev

Rev 14260 | Rev 14325 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 14260 Rev 14308
Line 1... Line 1...
1
import pymongo
1
import pymongo
2
from dtr.utils.utils import to_java_date
2
from dtr.utils.utils import to_java_date
3
from datetime import datetime, timedelta
3
from datetime import datetime, timedelta
4
from dtr.utils import AmazonPriceOnlyScraper
4
from dtr.utils import AmazonPriceOnlyScraper, AmazonDealScraper
5
from multiprocessing import Pool as ThreadPool
5
from multiprocessing import Pool as ThreadPool
6
from multiprocessing import cpu_count
6
from multiprocessing import cpu_count
7
import optparse
7
import optparse
8
 
8
 
9
con = None
9
con = None
Line 38... Line 38...
38
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
38
    dealFlagged = list(get_mongo_connection().Catalog.Deals.find({'source_id':1,'showDeal':1,'totalPoints':{'$gt':0}}))
39
    for deal in dealFlagged:
39
    for deal in dealFlagged:
40
        if not toScrapMap.has_key(deal['_id']):
40
        if not toScrapMap.has_key(deal['_id']):
41
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
41
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':deal['_id']}))
42
            toScrapMap[deal['_id']] = data[0]
42
            toScrapMap[deal['_id']] = data[0]
-
 
43
    manualDeals = list(get_mongo_connection().Catalog.ManualDeals.find({'startDate':{'$lte':to_java_date(datetime.now())},'endDate':{'$gte':to_java_date(datetime.now())},'source_id':1}))
-
 
44
    for manualDeal in manualDeals:
-
 
45
        if not toScrapMap.has_key(manualDeal['sku']):
-
 
46
            data = list(get_mongo_connection().Catalog.MasterData.find({'_id':manualDeal['sku']}))
-
 
47
            if len(data) > 0:
-
 
48
                data[0]['dealFlag'] = 1
-
 
49
                data[0]['dealType'] = manualDeal['dealType']
-
 
50
                toScrapMap[manualDeal['sku']] = data[0]
-
 
51
        else:
-
 
52
            data = toScrapMap.get(manualDeal['sku'])
-
 
53
            data['dealFlag'] = 1
-
 
54
            data['dealType'] = manualDeal['dealType']
43
    pool = ThreadPool(cpu_count() * 2)
55
    pool = ThreadPool(cpu_count() * 2)
44
    pool.map(scrapeAmazon,toScrapMap.values())
56
    pool.map(scrapeAmazon,toScrapMap.values())
45
    pool.close()
57
    pool.close()
46
    pool.join()
58
    pool.join()
47
    print "joining threads at %s"%(str(datetime.now()))
59
    print "joining threads at %s"%(str(datetime.now()))
48
        
60
        
49
 
61
 
50
def scrapeAmazon(data):
62
def scrapeAmazon(data):
51
    inStock = 0
63
    inStock = 0
-
 
64
    dealScraping = False
52
    print str(data['identifier'])
65
    print str(data['identifier'])
53
    if data['identifier'] is None or len(data['identifier'].strip())==0:
66
    if data['identifier'] is None or len(data['identifier'].strip())==0:
54
        return
67
        return
55
    
68
    
56
    try:
69
    try:
Line 59... Line 72...
59
            return
72
            return
60
    except:
73
    except:
61
        pass
74
        pass
62
    
75
    
63
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
76
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
-
 
77
    
-
 
78
    try:
-
 
79
        if data['dealFlag'] ==1 and data['dealType'] ==1:
-
 
80
            deal_url = "http://www.amazon.in/dp/%s"%(data['identifier'].strip())
-
 
81
            dealScraping = True
-
 
82
    except:
-
 
83
        data['dealFlag'] = 0
-
 
84
        data['dealType'] = 0
-
 
85
        
64
    print url
86
    print url
65
    lowestPrice = 0.0
87
    lowestPrice = 0.0
-
 
88
    
-
 
89
    if not dealScraping:
66
    scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
90
        scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
67
    lowestPrice = scraperAmazon.read(url)
91
        lowestPrice = scraperAmazon.read(url)
-
 
92
    else:
-
 
93
        dealScraperAmazon = AmazonDealScraper.AmazonScraper()
-
 
94
        lowestPrice = dealScraperAmazon.read(deal_url)
-
 
95
        if lowestPrice == 0:
-
 
96
            scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
-
 
97
            lowestPrice = scraperAmazon.read(url)
-
 
98
            dealScraping = False
-
 
99
        
68
    print lowestPrice
100
    print lowestPrice
69
    if lowestPrice > 0:
101
    if lowestPrice > 0:
70
        inStock = 1
102
        inStock = 1
71
    print lowestPrice
103
    print lowestPrice
72
    print inStock
104
    print inStock
73
    if lowestPrice > 0:
105
    if lowestPrice > 0:
74
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
106
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
75
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
107
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
76
    else:
108
    else:
77
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
109
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
78
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
110
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock,'dealType':data['dealType']}}, multi=True)
79
        
111
        
80
    try:
112
    try:
81
        recomputeDeal(data['skuBundleId'])
113
        recomputeDeal(data['skuBundleId'])
82
    except:
114
    except:
83
        print "Unable to compute deal for ",data['skuBundleId']    
115
        print "Unable to compute deal for ",data['skuBundleId']