Subversion Repositories SmartDukaan

Rev

Rev 5639 | Rev 6166 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4198 varun.gupt 1
'''
2
Created on 05-Dec-2011
3
@author: Varun Gupta
4
'''
5639 amar.kumar 5
from Scrapers.TradusScraper import TradusScraper
6
from Scrapers.SulekhaScraper import SulekhaScraper
4198 varun.gupt 7
 
8
class URL:
9
 
10
    def __init__(self, url):
11
        self.url = url
12
        self.source = URL.getSource(url)
13
 
14
    @staticmethod
15
    def getSource(url):
16
        try:
17
            return str(url.split('.')[1].strip())
18
        except Exception:
19
            return None
20
 
21
    def __str__(self):
22
        return '%s (%s)' % (self.url, self.source)
5291 varun.gupt 23
 
24
    def __unicode__(self):
25
        return '%s (%s)' % (self.url, self.source)
4198 varun.gupt 26
 
27
class URLQueue:
28
 
29
    def __init__(self):
5639 amar.kumar 30
        TradusScraper.currentPage = 1
31
        SulekhaScraper.currentPage = 1
4198 varun.gupt 32
        self.urls = [
33
            URL('http://www.flipkart.com/mobiles/all/'),
5291 varun.gupt 34
            URL('http://www.snapdeal.com/json/product/get/search/175/0/20?q=&sort=plrty&keyword='),
4198 varun.gupt 35
            URL('http://www.infibeam.com/Mobiles/search'),
36
            URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
37
            URL('http://www.flipkart.com/mobiles/tablet-20278'),
5761 amar.kumar 38
            URL('http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756'),
5291 varun.gupt 39
            URL('http://www.snapdeal.com/json/product/get/search/133/0/20?q=&sort=plrty&keyword='),
4198 varun.gupt 40
            URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
5639 amar.kumar 41
            URL('http://mobiles.sulekha.com/common/common.aspx?type=mobileofferslist&makeId=0&modelId=0&pageNo='),
5761 amar.kumar 42
            URL('http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762')
5291 varun.gupt 43
            #URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')
4198 varun.gupt 44
        ]
45
 
46
    def enqueue(self, url):
47
 
48
        if url is not None:
49
            new_url = url if url.__class__ == URL else URL(url)
50
 
51
            print 'Enqueue', new_url
52
            self.urls.append(new_url)
53
            print 'New URL set:', self.urls
54
 
55
    def get(self):
56
        print 'Count of URLs in queue:', len(self.urls)
57
        try:
58
            url = self.urls.pop(0) if len(self.urls) > 0 else None
59
        except IndexError:
60
            url = None
61
 
62
        print 'Poping', url
63
        print 'New URL set:', self.urls
64
        return url