Subversion Repositories SmartDukaan

Rev

Rev 6166 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4198 varun.gupt 1
'''
2
Created on 05-Dec-2011
3
@author: Varun Gupta
4
'''
5639 amar.kumar 5
from Scrapers.TradusScraper import TradusScraper
6
from Scrapers.SulekhaScraper import SulekhaScraper
4198 varun.gupt 7
 
8
class URL:
9
 
10
    def __init__(self, url):
11
        self.url = url
12
        self.source = URL.getSource(url)
13
 
14
    @staticmethod
15
    def getSource(url):
16
        try:
17
            return str(url.split('.')[1].strip())
18
        except Exception:
19
            return None
20
 
21
    def __str__(self):
22
        return '%s (%s)' % (self.url, self.source)
5291 varun.gupt 23
 
24
    def __unicode__(self):
25
        return '%s (%s)' % (self.url, self.source)
4198 varun.gupt 26
 
27
class URLQueue:
28
 
29
    def __init__(self):
5639 amar.kumar 30
        TradusScraper.currentPage = 1
31
        SulekhaScraper.currentPage = 1
4198 varun.gupt 32
        self.urls = [
6166 amar.kumar 33
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7668&page=0'),
4198 varun.gupt 34
            URL('http://www.flipkart.com/mobiles/all/'),
5291 varun.gupt 35
            URL('http://www.snapdeal.com/json/product/get/search/175/0/20?q=&sort=plrty&keyword='),
6169 amar.kumar 36
            URL('http://www.tradus.com/search/tradus_search/?query=mobile&cat=7844&page=0'),
6166 amar.kumar 37
            URL('http://www.infibeam.com/Mobiles/search?page=1'),
6169 amar.kumar 38
            URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
6166 amar.kumar 39
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7671&page=0'),
40
            URL('http://www.flipkart.com/mobiles/tablet-20278'),
41
            URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
42
            URL('http://www.flipkart.com/cameras/all-camcorder/'),
6169 amar.kumar 43
            URL('http://www.tradus.com/search/tradus_search/?query=mobile&cat=7759&page=0'),
5291 varun.gupt 44
            URL('http://www.snapdeal.com/json/product/get/search/133/0/20?q=&sort=plrty&keyword='),
5639 amar.kumar 45
            URL('http://mobiles.sulekha.com/common/common.aspx?type=mobileofferslist&makeId=0&modelId=0&pageNo='),
6166 amar.kumar 46
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7670&page=0'),
6169 amar.kumar 47
            URL('http://www.homeshop18.com/digital-cameras/category:3178/'),
6166 amar.kumar 48
            URL('http://www.flipkart.com/cameras/all-slr'),
49
            URL('http://www.infibeam.com/Cameras/search?page=1'),
6169 amar.kumar 50
            URL('http://www.tradus.com/search/tradus_search/?query=tablets&cat=7762&page=0'),
6166 amar.kumar 51
            URL('http://www.flipkart.com/cameras/all-point-shoot'),
6169 amar.kumar 52
            URL('http://www.homeshop18.com/digital-slrs/category:3188/'),
53
            URL('http://www.tradus.com/search/tradus_search/?query=mobile&cat=7759&page=0')
6166 amar.kumar 54
            #URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')'''
4198 varun.gupt 55
        ]
56
 
57
    def enqueue(self, url):
58
 
59
        if url is not None:
60
            new_url = url if url.__class__ == URL else URL(url)
61
 
62
            print 'Enqueue', new_url
63
            self.urls.append(new_url)
64
            print 'New URL set:', self.urls
65
 
66
    def get(self):
67
        print 'Count of URLs in queue:', len(self.urls)
68
        try:
69
            url = self.urls.pop(0) if len(self.urls) > 0 else None
70
        except IndexError:
71
            url = None
72
 
73
        print 'Poping', url
74
        print 'New URL set:', self.urls
75
        return url