Subversion Repositories SmartDukaan

Rev

Rev 14157 | Rev 14174 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 14157 Rev 14170
Line 1... Line 1...
1
import pymongo
1
import pymongo
2
from dtr.utils.utils import to_java_date
2
from dtr.utils.utils import to_java_date
3
from datetime import datetime, timedelta
3
from datetime import datetime, timedelta
4
from dtr.utils import AmazonPriceOnlyScraper
4
from dtr.utils import AmazonPriceOnlyScraper
5
from multiprocessing.dummy import Pool as ThreadPool 
5
from multiprocessing.dummy import Pool as ThreadPool
-
 
6
from multiprocessing import cpu_count
6
 
7
 
7
con = None
8
con = None
8
scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
-
 
9
 
-
 
10
 
9
 
11
def get_mongo_connection(host='localhost', port=27017):
10
def get_mongo_connection(host='localhost', port=27017):
12
    global con
11
    global con
13
    if con is None:
12
    if con is None:
14
        print "Establishing connection %s host and port %d" %(host,port)
13
        print "Establishing connection %s host and port %d" %(host,port)
Line 29... Line 28...
29
                toScrapMap[data['_id']] = data
28
                toScrapMap[data['_id']] = data
30
    for k, y in toScrapMap.iteritems():
29
    for k, y in toScrapMap.iteritems():
31
        print k,
30
        print k,
32
        print '\t',
31
        print '\t',
33
        print y
32
        print y
34
    pool = ThreadPool(50)
33
    pool = ThreadPool(cpu_count() * 2)
35
    pool.map(scrapeAmazon,toScrapMap.values())
34
    pool.map(scrapeAmazon,toScrapMap.values())
36
    pool.close()
35
    pool.close()
37
    pool.join()
36
    pool.join()
38
    print "joining threads"
37
    print "joining threads"
-
 
38
    print datetime.now()
39
        
39
        
40
 
40
 
41
def scrapeAmazon(data):
41
def scrapeAmazon(data):
42
    inStock = 0
42
    inStock = 0
43
    print str(data['identifier'])
43
    print str(data['identifier'])
Line 52... Line 52...
52
        pass
52
        pass
53
    
53
    
54
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
54
    url = "http://www.amazon.in/gp/offer-listing/%s/ref=olp_sort_ps"%(data['identifier'].strip())
55
    print url
55
    print url
56
    lowestPrice = 0.0
56
    lowestPrice = 0.0
-
 
57
    scraperAmazon = AmazonPriceOnlyScraper.AmazonScraper()
57
    lowestPrice = scraperAmazon.read(url)
58
    lowestPrice = scraperAmazon.read(url)
58
    print lowestPrice
59
    print lowestPrice
59
    if lowestPrice > 0:
60
    if lowestPrice > 0:
60
        inStock = 1
61
        inStock = 1
61
    print lowestPrice
62
    print lowestPrice