Subversion Repositories SmartDukaan

Rev

Rev 4039 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 25-Aug-2011

@author: Varun Gupta
'''
import sched, time
import ScraperLoader, Utils
from Clients import GAEServletClient

class ScraperAgent:
    
    def __init__(self):
        self.schedular = sched.scheduler(time.time, time.sleep)
        self.time_to_sleep = 2
        self.current_job = None
        self.data = {'id': None, 'job_id': None, 'source': None, 'phone_prices': None, 'next_url': None}
        print "ScraperAgent initiated at %f" % time.time()
        
    def work(self):
        
        if Utils.isValidRule(self.current_job):
            print 'Working on new job'
            
            url = self.current_job['url'] if 'url' in self.current_job else None
            print 'URL: ', url
            scraper = ScraperLoader.getScraper(self.current_job['source'])
            scraper.setUrl(url)
            scraper.scrape()
            phone_prices = scraper.getPhones()
            next_url = scraper.getNextUrl()
            
            self.data['id'] = self.current_job['assigneeId']
            self.data['job_id'] = self.current_job['id']
            self.data['source'] = self.current_job['source']
            self.data['phone_prices'] = phone_prices
            self.data['next_url'] = next_url
            
        print 'Posting data:', self.data
        
        self.current_job = GAEServletClient.postDataAndGetNewJob(self.data)
        
        self.data['job_id'] = None
        self.data['source'] = None
        self.data['phone_prices'] = None
        self.data['next_url'] = None
        
        print 'New job: ', self.current_job
        
        self.schedular.enter(int(self.current_job['timetowait']), 1, self.work, ())
    
    def start(self):
        self.schedular.enter(self.time_to_sleep, 1, self.work, ())
        self.schedular.run()

ScraperAgent().start()