| 4198 |
varun.gupt |
1 |
'''
|
|
|
2 |
Created on 05-Dec-2011
|
|
|
3 |
@author: Varun Gupta
|
|
|
4 |
'''
|
|
|
5 |
|
|
|
6 |
class URL:
|
|
|
7 |
|
|
|
8 |
def __init__(self, url):
|
|
|
9 |
self.url = url
|
|
|
10 |
self.source = URL.getSource(url)
|
|
|
11 |
|
|
|
12 |
@staticmethod
|
|
|
13 |
def getSource(url):
|
|
|
14 |
try:
|
|
|
15 |
return str(url.split('.')[1].strip())
|
|
|
16 |
except Exception:
|
|
|
17 |
return None
|
|
|
18 |
|
|
|
19 |
def __str__(self):
|
|
|
20 |
return '%s (%s)' % (self.url, self.source)
|
|
|
21 |
|
|
|
22 |
|
|
|
23 |
class URLQueue:
|
|
|
24 |
|
|
|
25 |
def __init__(self):
|
|
|
26 |
self.urls = [
|
|
|
27 |
URL('http://www.flipkart.com/mobiles/all/'),
|
|
|
28 |
URL('http://www.letsbuy.com/mobile-phones-mobiles-c-254_88?perpage=192'),
|
|
|
29 |
URL('http://www.infibeam.com/Mobiles/search'),
|
|
|
30 |
URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
|
|
|
31 |
URL('http://www.flipkart.com/mobiles/tablet-20278'),
|
|
|
32 |
URL('http://www.letsbuy.com/mobile-phones-tablets-c-254_393?perpage=192'),
|
|
|
33 |
URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
|
|
|
34 |
URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')
|
|
|
35 |
]
|
|
|
36 |
|
|
|
37 |
def enqueue(self, url):
|
|
|
38 |
|
|
|
39 |
if url is not None:
|
|
|
40 |
new_url = url if url.__class__ == URL else URL(url)
|
|
|
41 |
|
|
|
42 |
print 'Enqueue', new_url
|
|
|
43 |
self.urls.append(new_url)
|
|
|
44 |
print 'New URL set:', self.urls
|
|
|
45 |
|
|
|
46 |
def get(self):
|
|
|
47 |
print 'Count of URLs in queue:', len(self.urls)
|
|
|
48 |
try:
|
|
|
49 |
url = self.urls.pop(0) if len(self.urls) > 0 else None
|
|
|
50 |
except IndexError:
|
|
|
51 |
url = None
|
|
|
52 |
|
|
|
53 |
print 'Poping', url
|
|
|
54 |
print 'New URL set:', self.urls
|
|
|
55 |
return url
|