Subversion Repositories SmartDukaan

Rev

Rev 4039 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3232 varun.gupt 1
'''
2
Created on 25-Aug-2011
3
 
4
@author: Varun Gupta
5
'''
6
import sched, time
7
import ScraperLoader, Utils
8
from Clients import GAEServletClient
9
 
10
class ScraperAgent:
11
 
12
    def __init__(self):
13
        self.schedular = sched.scheduler(time.time, time.sleep)
14
        self.time_to_sleep = 2
15
        self.current_job = None
16
        self.data = {'id': None, 'job_id': None, 'source': None, 'phone_prices': None, 'next_url': None}
17
        print "ScraperAgent initiated at %f" % time.time()
18
 
19
    def work(self):
20
 
21
        if Utils.isValidRule(self.current_job):
22
            print 'Working on new job'
23
 
24
            url = self.current_job['url'] if 'url' in self.current_job else None
25
            print 'URL: ', url
26
            scraper = ScraperLoader.getScraper(self.current_job['source'])
27
            scraper.setUrl(url)
28
            scraper.scrape()
29
            phone_prices = scraper.getPhones()
30
            next_url = scraper.getNextUrl()
31
 
32
            self.data['id'] = self.current_job['assigneeId']
33
            self.data['job_id'] = self.current_job['id']
34
            self.data['source'] = self.current_job['source']
35
            self.data['phone_prices'] = phone_prices
36
            self.data['next_url'] = next_url
37
 
38
        print 'Posting data:', self.data
39
 
40
        self.current_job = GAEServletClient.postDataAndGetNewJob(self.data)
41
 
42
        self.data['job_id'] = None
43
        self.data['source'] = None
44
        self.data['phone_prices'] = None
45
        self.data['next_url'] = None
46
 
47
        print 'New job: ', self.current_job
48
 
49
        self.schedular.enter(int(self.current_job['timetowait']), 1, self.work, ())
50
 
51
    def start(self):
52
        self.schedular.enter(self.time_to_sleep, 1, self.work, ())
53
        self.schedular.run()
54
 
55
ScraperAgent().start()