Subversion Repositories SmartDukaan

Rev

Rev 11934 | Rev 12198 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 11934 Rev 12197
Line 31... Line 31...
31
        request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1')
31
        request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1')
32
        opener = urllib2.build_opener()
32
        opener = urllib2.build_opener()
33
        response_data = ""
33
        response_data = ""
34
        try:
34
        try:
35
            response_data = opener.open(request).read()
35
            response_data = opener.open(request).read()
-
 
36
            print "Fetched response from flipkart for %s" %(url)
36
            
37
            
37
        except urllib2.HTTPError as e:
38
        except urllib2.HTTPError as e:
38
            print 'ERROR: ', e
39
            print 'ERROR: ', e
39
            print 'Retrying'
40
            print 'Retrying'
40
            self.count_trials += 1
41
            self.count_trials += 1
Line 44... Line 45...
44
        
45
        
45
        self.response_data=response_data
46
        self.response_data=response_data
46
    
47
    
47
    def createData(self):
48
    def createData(self):
48
        self.soup = strip_tags(self.response_data,invalid_tags)
49
        self.soup = strip_tags(self.response_data,invalid_tags)
-
 
50
        self.response_data =None
49
        return self.scrape(self.soup)
51
        return self.scrape(self.soup)
50
    
52
    
51
    
53
    
52
    def scrape(self,soup):
54
    def scrape(self,soup):
53
        sellerData = soup.findAll("div" , {"class" : "a-row a-spacing-mini olpOffer"})
55
        sellerData = soup.findAll("div" , {"class" : "a-row a-spacing-mini olpOffer"})