Subversion Repositories SmartDukaan

Rev

Rev 4039 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4039 varun.gupt 1
'''
2
Created on 20-Sep-2011
3
 
4
@author: Varun Gupta
5
'''
6
import urllib2
7
 
8
class BaseScraper:
9
    def __init__(self):
10
        self.count_trials = 0
11
 
12
    def read(self, url):
13
        request = urllib2.Request(url)
14
        request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1')
15
        opener = urllib2.build_opener()
16
        try:
17
            response_data = opener.open(request).read()
18
 
19
        except urllib2.HTTPError as e:
5291 varun.gupt 20
            print 'ERROR: ', e
4039 varun.gupt 21
            print 'Retrying'
22
            self.count_trials += 1
23
 
24
            if self.count_trials < 3:
25
                return self.read(url)
26
 
27
        return response_data
28
 
29
if __name__ == '__main__':
30
    scraper = BaseScraper()
31
    print scraper.read('http://www.flipkart.com/mobiles/all')