Subversion Repositories SmartDukaan

Rev

Rev 5291 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4198 varun.gupt 1
'''
2
Created on 05-Dec-2011
3
@author: Varun Gupta
4
'''
5
 
6
class URL:
7
 
8
    def __init__(self, url):
9
        self.url = url
10
        self.source = URL.getSource(url)
11
 
12
    @staticmethod
13
    def getSource(url):
14
        try:
15
            return str(url.split('.')[1].strip())
16
        except Exception:
17
            return None
18
 
19
    def __str__(self):
20
        return '%s (%s)' % (self.url, self.source)
21
 
22
 
23
class URLQueue:
24
 
25
    def __init__(self):
26
        self.urls = [
27
            URL('http://www.flipkart.com/mobiles/all/'),
28
            URL('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192'),
29
            URL('http://www.infibeam.com/Mobiles/search'),
30
            URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
31
            URL('http://www.flipkart.com/mobiles/tablet-20278'),
32
            URL('http://www.letsbuy.com/mobile-phones-tablets-c-254_393?perpage=192'),
33
            URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
34
            URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')
35
        ]
36
 
37
    def enqueue(self, url):
38
 
39
        if url is not None:
40
            new_url = url if url.__class__ == URL else URL(url)
41
 
42
            print 'Enqueue', new_url
43
            self.urls.append(new_url)
44
            print 'New URL set:', self.urls
45
 
46
    def get(self):
47
        print 'Count of URLs in queue:', len(self.urls)
48
        try:
49
            url = self.urls.pop(0) if len(self.urls) > 0 else None
50
        except IndexError:
51
            url = None
52
 
53
        print 'Poping', url
54
        print 'New URL set:', self.urls
55
        return url