Subversion Repositories SmartDukaan

Rev

Rev 4198 | Rev 5639 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4198 varun.gupt 1
'''
2
Created on 05-Dec-2011
3
@author: Varun Gupta
4
'''
5
 
6
class URL:
7
 
8
    def __init__(self, url):
9
        self.url = url
10
        self.source = URL.getSource(url)
11
 
12
    @staticmethod
13
    def getSource(url):
14
        try:
15
            return str(url.split('.')[1].strip())
16
        except Exception:
17
            return None
18
 
19
    def __str__(self):
20
        return '%s (%s)' % (self.url, self.source)
5291 varun.gupt 21
 
22
    def __unicode__(self):
23
        return '%s (%s)' % (self.url, self.source)
4198 varun.gupt 24
 
25
class URLQueue:
26
 
27
    def __init__(self):
28
        self.urls = [
29
            URL('http://www.flipkart.com/mobiles/all/'),
5291 varun.gupt 30
            URL('http://www.snapdeal.com/json/product/get/search/175/0/20?q=&sort=plrty&keyword='),
4198 varun.gupt 31
            URL('http://www.infibeam.com/Mobiles/search'),
32
            URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
33
            URL('http://www.flipkart.com/mobiles/tablet-20278'),
5291 varun.gupt 34
            URL('http://www.snapdeal.com/json/product/get/search/133/0/20?q=&sort=plrty&keyword='),
4198 varun.gupt 35
            URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
5291 varun.gupt 36
            #URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')
4198 varun.gupt 37
        ]
38
 
39
    def enqueue(self, url):
40
 
41
        if url is not None:
42
            new_url = url if url.__class__ == URL else URL(url)
43
 
44
            print 'Enqueue', new_url
45
            self.urls.append(new_url)
46
            print 'New URL set:', self.urls
47
 
48
    def get(self):
49
        print 'Count of URLs in queue:', len(self.urls)
50
        try:
51
            url = self.urls.pop(0) if len(self.urls) > 0 else None
52
        except IndexError:
53
            url = None
54
 
55
        print 'Poping', url
56
        print 'New URL set:', self.urls
57
        return url