Rev 4039 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
'''Created on 25-Aug-2011@author: Varun Gupta'''import sched, timeimport ScraperLoader, Utilsfrom Clients import GAEServletClientclass ScraperAgent:def __init__(self):self.schedular = sched.scheduler(time.time, time.sleep)self.time_to_sleep = 2self.current_job = Noneself.data = {'id': None, 'job_id': None, 'source': None, 'phone_prices': None, 'next_url': None}print "ScraperAgent initiated at %f" % time.time()def work(self):if Utils.isValidRule(self.current_job):print 'Working on new job'url = self.current_job['url'] if 'url' in self.current_job else Noneprint 'URL: ', urlscraper = ScraperLoader.getScraper(self.current_job['source'])scraper.setUrl(url)scraper.scrape()phone_prices = scraper.getPhones()next_url = scraper.getNextUrl()self.data['id'] = self.current_job['assigneeId']self.data['job_id'] = self.current_job['id']self.data['source'] = self.current_job['source']self.data['phone_prices'] = phone_pricesself.data['next_url'] = next_urlprint 'Posting data:', self.dataself.current_job = GAEServletClient.postDataAndGetNewJob(self.data)self.data['job_id'] = Noneself.data['source'] = Noneself.data['phone_prices'] = Noneself.data['next_url'] = Noneprint 'New job: ', self.current_jobself.schedular.enter(int(self.current_job['timetowait']), 1, self.work, ())def start(self):self.schedular.enter(self.time_to_sleep, 1, self.work, ())self.schedular.run()ScraperAgent().start()