Subversion Repositories SmartDukaan

Rev

Rev 5761 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4198 varun.gupt 1
'''
2
Created on 05-Dec-2011
3
@author: Varun Gupta
4
'''
5639 amar.kumar 5
from Scrapers.TradusScraper import TradusScraper
6
from Scrapers.SulekhaScraper import SulekhaScraper
4198 varun.gupt 7
 
8
class URL:
9
 
10
    def __init__(self, url):
11
        self.url = url
12
        self.source = URL.getSource(url)
13
 
14
    @staticmethod
15
    def getSource(url):
16
        try:
17
            return str(url.split('.')[1].strip())
18
        except Exception:
19
            return None
20
 
21
    def __str__(self):
22
        return '%s (%s)' % (self.url, self.source)
5291 varun.gupt 23
 
24
    def __unicode__(self):
25
        return '%s (%s)' % (self.url, self.source)
4198 varun.gupt 26
 
27
class URLQueue:
28
 
29
    def __init__(self):
5639 amar.kumar 30
        TradusScraper.currentPage = 1
31
        SulekhaScraper.currentPage = 1
4198 varun.gupt 32
        self.urls = [
6166 amar.kumar 33
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7668&page=0'),
4198 varun.gupt 34
            URL('http://www.flipkart.com/mobiles/all/'),
5291 varun.gupt 35
            URL('http://www.snapdeal.com/json/product/get/search/175/0/20?q=&sort=plrty&keyword='),
6166 amar.kumar 36
            URL('http://www.infibeam.com/Mobiles/search?page=1'),
37
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7671&page=0'),
38
            URL('http://www.flipkart.com/mobiles/tablet-20278'),
4198 varun.gupt 39
            URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
6166 amar.kumar 40
            URL('http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=0'),
41
            URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
42
            URL('http://www.flipkart.com/cameras/all-camcorder/'),
5291 varun.gupt 43
            URL('http://www.snapdeal.com/json/product/get/search/133/0/20?q=&sort=plrty&keyword='),
5639 amar.kumar 44
            URL('http://mobiles.sulekha.com/common/common.aspx?type=mobileofferslist&makeId=0&modelId=0&pageNo='),
6166 amar.kumar 45
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7670&page=0'),
46
            URL('http://www.flipkart.com/cameras/all-slr'),
47
            URL('http://www.homeshop18.com/digital-cameras/category:3178/'),
48
            URL('http://www.infibeam.com/Cameras/search?page=1'),
49
            URL('http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=0'),
50
            URL('http://www.flipkart.com/cameras/all-point-shoot'),
51
            URL('http://www.homeshop18.com/digital-slrs/category:3188/')
52
            #URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')'''
4198 varun.gupt 53
        ]
54
 
55
    def enqueue(self, url):
56
 
57
        if url is not None:
58
            new_url = url if url.__class__ == URL else URL(url)
59
 
60
            print 'Enqueue', new_url
61
            self.urls.append(new_url)
62
            print 'New URL set:', self.urls
63
 
64
    def get(self):
65
        print 'Count of URLs in queue:', len(self.urls)
66
        try:
67
            url = self.urls.pop(0) if len(self.urls) > 0 else None
68
        except IndexError:
69
            url = None
70
 
71
        print 'Poping', url
72
        print 'New URL set:', self.urls
73
        return url