Subversion Repositories SmartDukaan

Rev

Rev 14133 | Rev 14157 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 14133 Rev 14147
Line 1... Line 1...
1
import pymongo
1
import pymongo
2
from dtr.utils.utils import to_java_date
2
from dtr.utils.utils import to_java_date
3
from datetime import datetime, timedelta
3
from datetime import datetime, timedelta
4
from dtr.utils import AmazonPriceOnlyScraper
4
from dtr.utils import AmazonPriceOnlyScraper
-
 
5
from multiprocessing.dummy import Pool as ThreadPool 
5
 
6
 
6
con = None
7
con = None
7
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
8
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
8
 
9
 
9
 
10
 
Line 16... Line 17...
16
        except Exception, e:
17
        except Exception, e:
17
            print e
18
            print e
18
            return None
19
            return None
19
    return con
20
    return con
20
 
21
 
21
def scrapeAmazon():
22
def populate():
-
 
23
    toScrapMap = {}
22
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
24
    bestSellers = list(get_mongo_connection().Catalog.MasterData.find({'rank':{'$gt':0}}))
23
    for bestSeller in bestSellers: 
25
    for bestSeller in bestSellers: 
24
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
26
        amazonBestSellers = list(get_mongo_connection().Catalog.MasterData.find({'skuBundleId':bestSeller['skuBundleId'],'source_id':1}))
25
        for data in amazonBestSellers:
27
        for data in amazonBestSellers:
26
            inStock = 0
28
            if not toScrapMap.has_key(data['_id']):
27
            print str(data['identifier'])
29
                toScrapMap[data['_id']] = data
28
            if data['identifier'] is None or len(data['identifier'].strip())==0:
30
    for k, y in toScrapMap.iteritems():
-
 
31
        print k,
29
                print "continue"
32
        print '\t',
30
                continue
33
        print y
-
 
34
    pool = ThreadPool(50)
-
 
35
    pool.map(scrapeAmazon,toScrapMap.values())
-
 
36
    pool.close()
-
 
37
    pool.join()
-
 
38
    print "joining threads"
31
            
39
        
-
 
40
 
-
 
41
def scrapeAmazon(data):
-
 
42
    inStock = 0
-
 
43
    print str(data['identifier'])
-
 
44
    if data['identifier'] is None or len(data['identifier'].strip())==0:
32
            try:
45
        return
-
 
46
    
-
 
47
    try:
33
                if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
48
        if data['priceUpdatedOn'] > to_java_date(datetime.now() - timedelta(minutes=5)):
34
                    print "sku id is already updated",data['_id'] 
49
            print "sku id is already updated",data['_id'] 
35
                    continue
50
            return
36
            except:
51
    except:
37
                pass
52
        pass
-
 
53
    
-
 
54
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
-
 
55
    print url
-
 
56
    lowestPrice = 0.0
-
 
57
    lowestPrice = scraperAmazon.read(url)
-
 
58
    print lowestPrice
-
 
59
    if lowestPrice > 0:
-
 
60
        inStock = 1
-
 
61
    print lowestPrice
-
 
62
    print inStock
-
 
63
    if lowestPrice > 0:
-
 
64
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
-
 
65
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
-
 
66
    else:
-
 
67
        get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
-
 
68
        get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
-
 
69
        
-
 
70
    try:
-
 
71
        recomputeDeal(data['skuBundleId'])
-
 
72
    except:
-
 
73
        print "Unable to compute deal for ",data['skuBundleId']    
38
            
74
            
39
            url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
-
 
40
            print url
-
 
41
            lowestPrice = 0.0
-
 
42
            lowestPrice = scraperAmazon.read(url)
-
 
43
            print lowestPrice
-
 
44
            if lowestPrice > 0:
-
 
45
                inStock = 1
-
 
46
            print lowestPrice
-
 
47
            print inStock
-
 
48
            if lowestPrice > 0:
-
 
49
                get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice,'updatedOn':to_java_date(datetime.now()),'priceUpdatedOn':to_java_date(datetime.now()),'in_stock':inStock}}, multi=True)
-
 
50
                get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'available_price':lowestPrice , 'in_stock':inStock}}, multi=True)
-
 
51
            else:
-
 
52
                get_mongo_connection().Catalog.MasterData.update({'_id':data['_id']}, {'$set' : {'updatedOn':to_java_date(datetime.now()),'in_stock':inStock,'priceUpdatedOn':to_java_date(datetime.now())}}, multi=True)
-
 
53
                get_mongo_connection().Catalog.Deals.update({'_id':data['_id']}, {'$set' : {'in_stock':inStock}}, multi=True)
-
 
54
                
-
 
55
            try:
-
 
56
                recomputeDeal(data['skuBundleId'])
-
 
57
            except:
-
 
58
                print "Unable to compute deal for ",data['skuBundleId']
-
 
59
        
75
        
60
 
76
 
61
def recomputeDeal(skuBundleId):
77
def recomputeDeal(skuBundleId):
62
    """Lets recompute deal for this bundle"""
78
    """Lets recompute deal for this bundle"""
63
    print "Recomputing for bundleId",skuBundleId
79
    print "Recomputing for bundleId",skuBundleId
Line 88... Line 104...
88
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
104
        get_mongo_connection().Catalog.Deals.update({ '_id' : bestOne['_id'] }, {'$set':{'showDeal':1 }})
89
    if len(toUpdate) > 0:
105
    if len(toUpdate) > 0:
90
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
106
        get_mongo_connection().Catalog.Deals.update({ '_id' : { "$in": toUpdate } }, {'$set':{'showDeal':0 }},upsert=False, multi=True)
91
 
107
 
92
def main():
108
def main():
-
 
109
    populate()
93
    scrapeAmazon()
110
    #scrapeAmazon()
94
            
111
            
95
if __name__=='__main__':
112
if __name__=='__main__':
96
    main()
113
    main()