Subversion Repositories SmartDukaan

Rev

Rev 5761 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 05-Dec-2011
@author: Varun Gupta
'''
from Scrapers.TradusScraper import TradusScraper
from Scrapers.SulekhaScraper import SulekhaScraper

class URL:
    
    def __init__(self, url):
        self.url = url
        self.source = URL.getSource(url)
    
    @staticmethod
    def getSource(url):
        try:
            return str(url.split('.')[1].strip())
        except Exception:
            return None
    
    def __str__(self):
        return '%s (%s)' % (self.url, self.source)
    
    def __unicode__(self):
        return '%s (%s)' % (self.url, self.source)

class URLQueue:

    def __init__(self):
        TradusScraper.currentPage = 1
        SulekhaScraper.currentPage = 1
        self.urls = [
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7668&page=0'),
            URL('http://www.flipkart.com/mobiles/all/'),
            URL('http://www.snapdeal.com/json/product/get/search/175/0/20?q=&sort=plrty&keyword='),
            URL('http://www.infibeam.com/Mobiles/search?page=1'),
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7671&page=0'),
            URL('http://www.flipkart.com/mobiles/tablet-20278'),
            URL('http://www.homeshop18.com/gsm-handsets/category:3027/'),
            URL('http://www.tradus.com/search/tradus_search/?query=mobile&filters=cat:7756&page=0'),
            URL('http://www.homeshop18.com/ipads-2f-tablets/category:8937/'),
            URL('http://www.flipkart.com/cameras/all-camcorder/'),
            URL('http://www.snapdeal.com/json/product/get/search/133/0/20?q=&sort=plrty&keyword='),
            URL('http://mobiles.sulekha.com/common/common.aspx?type=mobileofferslist&makeId=0&modelId=0&pageNo='),
            URL('http://www.tradus.com/search/tradus_search/?query=camera&cat=7670&page=0'),
            URL('http://www.flipkart.com/cameras/all-slr'),
            URL('http://www.homeshop18.com/digital-cameras/category:3178/'),
            URL('http://www.infibeam.com/Cameras/search?page=1'),
            URL('http://www.tradus.com/search/tradus_search/?query=tablets&filters=cat:7756&cat:7762&page=0'),
            URL('http://www.flipkart.com/cameras/all-point-shoot'),
            URL('http://www.homeshop18.com/digital-slrs/category:3188/')
            #URL('http://www.adexmart.com/modules/coremanager/modules/filtersearch/filtersearch.json.php?act=filter&ident=16&page=1&perpage=1000&orderby=newest&orderway=desc')'''
        ]
    
    def enqueue(self, url):
        
        if url is not None:
            new_url = url if url.__class__ == URL else URL(url)
        
            print 'Enqueue', new_url
            self.urls.append(new_url)
            print 'New URL set:', self.urls
    
    def get(self):
        print 'Count of URLs in queue:', len(self.urls)
        try:
            url = self.urls.pop(0) if len(self.urls) > 0 else None
        except IndexError:
            url = None
        
        print 'Poping', url
        print 'New URL set:', self.urls
        return url