Rev 4198 | Rev 5507 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 16-Sep-2011@author: Varun Gupta'''import sys, jsonfrom Clients.GAEServletClient import url, clearPriceData, initJobQueue, getPhonePricesJSONfrom ScraperAgent import ScraperAgentfrom PyLucene.IndexBuilder import IndexBuilderdef startScraper():ScraperAgent().start()def buildIndex():#price_data = getPhonePricesJSON(url)f = open('/tmp/price-comp-dashboard/primary-crawl.json')price_data = json.load(f)print len(price_data)indexer = IndexBuilder(price_data = price_data, new_index = True)indexer.build()if __name__ == '__main__':try:cmd = sys.argv[1].strip()print cmdif cmd == 'clean':clearPriceData(url)elif cmd == 'init':initJobQueue(url)elif cmd == 'scrape':startScraper()elif cmd == 'index':buildIndex()except IndexError as e:print eprint 'ERROR: Command line param must be specified. Options: clean, init, scrape, index'