Subversion Repositories SmartDukaan

Rev

Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed

'''
Created on 20-Sep-2011

@author: Varun Gupta
'''
import urllib2

class BaseScraper:
    def __init__(self):
        self.count_trials = 0
    
    def read(self, url):
        request = urllib2.Request(url)
        request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1')
        opener = urllib2.build_opener()
        try:
            response_data = opener.open(request).read()
            
        except urllib2.HTTPError as e:
            print 'ERROR:', e
            print 'Retrying'
            self.count_trials += 1
            
            if self.count_trials < 3:
                return self.read(url)
        
        return response_data

if __name__ == '__main__':
    scraper = BaseScraper()
    print scraper.read('http://www.flipkart.com/mobiles/all')