Subversion Repositories SmartDukaan

Rev

Rev 5507 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5507 Rev 5761
Line 12... Line 12...
12
def startScraper():
12
def startScraper():
13
    ScraperAgent().start()
13
    ScraperAgent().start()
14
 
14
 
15
def buildIndex():
15
def buildIndex():
16
    #price_data = getPhonePricesJSON(url)
16
    #price_data = getPhonePricesJSON(url)
17
    f = open('/tmp/price-comp-dashboard/primary-crawl.json')
17
    f = open('/usr/price-comp-dashboard/primary-crawl.json')
18
    price_data = json.load(f)
18
    price_data = json.load(f)
19
    print len(price_data)
19
    print len(price_data)
20
    indexer = IndexBuilder(price_data = price_data, new_index = True)
20
    indexer = IndexBuilder(price_data = price_data, new_index = True)
21
    indexer.build()
21
    indexer.build()
22
 
22
 
Line 33... Line 33...
33
        
33
        
34
        elif cmd == 'scrape':
34
        elif cmd == 'scrape':
35
            startScraper()
35
            startScraper()
36
        
36
        
37
        elif cmd == 'scrapep':
37
        elif cmd == 'scrapep':
38
            f = open('/tmp/price-comp-dashboard/urls.json')
38
            f = open('/usr/price-comp-dashboard/urls.json')
39
            data = {}
39
            data = {}
40
            
40
            
41
            for entityId, sourcenurl in json.load(f).iteritems():
41
            for entityId, sourcenurl in json.load(f).iteritems():
42
                for source, url in sourcenurl.iteritems():
42
                for source, url in sourcenurl.iteritems():
43
                    print entityId, source, url
43
                    print entityId, source, url
Line 48... Line 48...
48
                    if entityId in data:
48
                    if entityId in data:
49
                        data[entityId][source] = productData
49
                        data[entityId][source] = productData
50
                    else:
50
                    else:
51
                        data[entityId] = {source: productData}
51
                        data[entityId] = {source: productData}
52
            
52
            
53
            fw = open('/tmp/price-comp-dashboard/secondary-crawl.json', 'w')
53
            fw = open('/usr/price-comp-dashboard/secondary-crawl.json', 'w')
54
            json.dump(data, fw, indent = 4)
54
            json.dump(data, fw, indent = 4)
55
        
55
        
56
        elif cmd == 'index':
56
        elif cmd == 'index':
57
            buildIndex()
57
            buildIndex()
58
        
58