Subversion Repositories SmartDukaan

Rev

Rev 15906 | Rev 16096 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 15906 Rev 16004
Line 1... Line 1...
1
import urllib2
1
import urllib2
2
from BeautifulSoup import BeautifulSoup, NavigableString
2
from BeautifulSoup import BeautifulSoup, NavigableString
3
from dtr.utils.utils import fetchResponseUsingProxy, removePriceFormatting
3
from dtr.utils.utils import fetchResponseUsingProxy, removePriceFormatting, \
-
 
4
transformUrl
4
import re
5
import re
5
 
6
 
6
invalid_tags = ['b', 'i', 'u']
7
invalid_tags = ['b', 'i', 'u']
7
bestSellers = []
8
bestSellers = []
8
 
9
 
Line 36... Line 37...
36
        self.count_trials = 0
37
        self.count_trials = 0
37
        self.livePricing = livePricing
38
        self.livePricing = livePricing
38
    
39
    
39
    def read(self, url):
40
    def read(self, url):
40
        response_data = ""
41
        response_data = ""
-
 
42
        url = transformUrl(url,5)
-
 
43
        print url
41
        try:
44
        try:
42
            response_data = fetchResponseUsingProxy(url, headers=headers, proxy=False)
45
            response_data = fetchResponseUsingProxy(url, headers=headers, proxy=False)
43
        except Exception as e:
46
        except Exception as e:
44
            print 'ERROR: ', e
47
            print 'ERROR: ', e
45
            print 'Retrying'
48
            print 'Retrying'
Line 83... Line 86...
83
 
86
 
84
if __name__ == '__main__':
87
if __name__ == '__main__':
85
    import datetime
88
    import datetime
86
    print datetime.datetime.now()
89
    print datetime.datetime.now()
87
    scraper = ShopCluesScraper()
90
    scraper = ShopCluesScraper()
88
    print scraper.read('http://m.shopclues.com/samsung-galaxy-note-4-white-colour-32gb-ready-stock.html')
91
    print scraper.read('http://shopclues.com/samsung-galaxy-note-4-13.html')
89
    print datetime.datetime.now()
92
    print datetime.datetime.now()
90
93