Subversion Repositories SmartDukaan

Rev

Rev 4198 | Rev 5507 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4039 varun.gupt 1
'''
2
Created on 16-Sep-2011
3
 
4
@author: Varun Gupta
5
'''
5291 varun.gupt 6
import sys, json
4039 varun.gupt 7
from Clients.GAEServletClient import url, clearPriceData, initJobQueue, getPhonePricesJSON
8
from ScraperAgent import ScraperAgent
9
from PyLucene.IndexBuilder import IndexBuilder
10
 
11
def startScraper():
12
    ScraperAgent().start()
13
 
14
def buildIndex():
5291 varun.gupt 15
    #price_data = getPhonePricesJSON(url)
16
    f = open('/tmp/price-comp-dashboard/primary-crawl.json')
17
    price_data = json.load(f)
18
    print len(price_data)
19
    indexer = IndexBuilder(price_data = price_data, new_index = True)
4039 varun.gupt 20
    indexer.build()
21
 
22
if __name__ == '__main__':
23
    try:
24
        cmd = sys.argv[1].strip()
25
        print cmd
26
 
27
        if cmd == 'clean':
28
            clearPriceData(url)
29
 
30
        elif cmd == 'init':
31
            initJobQueue(url)
32
 
33
        elif cmd == 'scrape':
34
            startScraper()
35
 
36
        elif cmd == 'index': 
37
            buildIndex()
38
 
39
    except IndexError as e:
40
        print e
41
        print 'ERROR: Command line param must be specified. Options: clean, init, scrape, index'