Subversion Repositories SmartDukaan

Rev

Rev 12410 | Rev 12412 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 12410 Rev 12411
Line 29... Line 29...
29
        self.count_trials = 0
29
        self.count_trials = 0
30
    
30
    
31
    def read(self, urls, findStore):
31
    def read(self, urls, findStore):
32
        returnMap = {}
32
        returnMap = {}
33
        print datetime.datetime.now()
33
        print datetime.datetime.now()
34
        header = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1'}
34
        header = {'User-Agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.218 Safari/535.1',
-
 
35
                  'Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
-
 
36
                  'Accept-Encoding':'gzip,deflate,sdch'
-
 
37
                  }
-
 
38
 
35
        rs = (grequests.get(u, headers=header) for u in urls)
39
        rs = (grequests.get(u, headers=header) for u in urls)
36
        for x in grequests.map(rs):
40
        for x in grequests.map(rs):
37
            soup = strip_tags(x.text,invalid_tags)
41
            soup = strip_tags(x.text,invalid_tags)
38
            print soup
-
 
39
            for tag in soup.findAll(True):
42
            for tag in soup.findAll(True):
40
                if tag.name in invalid_tags:
43
                if tag.name in invalid_tags:
41
                    s = ""
44
                    s = ""
42
        
45
        
43
                    for c in tag.contents:
46
                    for c in tag.contents: