Subversion Repositories SmartDukaan

Rev

Rev 12765 | Rev 12821 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 12765 Rev 12766
Line 7... Line 7...
7
    def __init__(self):
7
    def __init__(self):
8
        self.count_trials = 0
8
        self.count_trials = 0
9
        self.redirectCount = 0
9
        self.redirectCount = 0
10
    
10
    
11
    def read(self, url):
11
    def read(self, url):
12
        print url.replace('http://www.flipkart.com','163.53.77.21')
12
        url = url.replace('www.flipkart.com','163.53.77.21')
13
        print url
13
        print url
14
        request = urllib2.Request(url)
14
        request = urllib2.Request(url)
15
        request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686; rv:31.0) Gecko/20100101 Firefox/31.0')
15
        request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux i686; rv:31.0) Gecko/20100101 Firefox/31.0')
16
        opener = urllib2.build_opener()
16
        opener = urllib2.build_opener()
17
        response_data = ""
17
        response_data = ""
-
 
18
        redirect_url = ""
18
        try:
19
        try:
19
            response = urllib2.urlopen(request)
20
            response = urllib2.urlopen(request)
20
            response_data = response.read()
21
            response_data = response.read()
21
            print "Fetched response from flipkart for %s" %(url)
22
            print "Fetched response from flipkart for %s" %(url)
22
            redirect_url = response.url
23
            redirect_url = response.url
Line 42... Line 43...
42
        for x in table_rows:
43
        for x in table_rows:
43
            print x
44
            print x
44
    
45
    
45
    def createData(self,url, redirect_url):
46
    def createData(self,url, redirect_url):
46
        print "Creating soup from flipkart data for %s" %(url)
47
        print "Creating soup from flipkart data for %s" %(url)
-
 
48
        redirect_url = redirect_url.replace('www.flipkart.com','163.53.77.21')
47
        print redirect_url
49
        print "Redirect url is %s"%(redirect_url)
48
        page=self.response_data.decode("utf-8")
50
        page=self.response_data.decode("utf-8")
49
        self.soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
51
        self.soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
50
        page = None
52
        page = None
51
        self.response_data = None
53
        self.response_data = None
52
        print "Soup created from flipkart data for %s" %(url)
54
        print "Soup created from flipkart data for %s" %(url)
53
        if (url==redirect_url):
55
        if (url==redirect_url):
54
            return self.scrape(self.soup,url)
56
            return self.scrape(self.soup,url)
55
        else:
57
        else:
56
            print self.redirectCount
58
            print self.redirectCount
57
            self.redirectCount+=1
59
            self.redirectCount+=1
58
            if self.redirectCount >4:
60
            if self.redirectCount >5:
59
                raise
61
                raise
60
            return self.read(url)
62
            return self.read(url)
61
            
63
            
62
            
64
            
63
    
65