Rev 15265 | Rev 17182 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
from dtr.utils.utils import fetchResponseUsingProxy, transformUrlfrom sys import exitimport jsonimport reimport tracebackimport datetimefrom pyquery import PyQueryheaders = {'User-agent':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36','Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Accept-Language' : 'en-US,en;q=0.8','Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection':'keep-alive','Accept-Encoding' : 'gzip,deflate,sdch'}class FlipkartProductPageScraper:def __init__(self):self.count_trials = 0self.redirectCount = 0def read(self, url):response_data = ""redirect_url = ""url = transformUrl(url,2)try:"""quick fix,need to add it conf"""response_data = fetchResponseUsingProxy(url, headers, proxy=True)print "Fetched response from flipkart for %s" %(url)#redirect_url = response.urlexcept Exception as e:traceback.print_exc()print 'ERROR: ', eprint 'Retrying'self.count_trials += 1if self.count_trials < 3:return self.read(url)self.response_data=response_dataprint datetime.datetime.now()return self.createData(url,redirect_url)def createData(self,url, redirect_url):pq = PyQuery(self.response_data)buyBoxPrice = float(pq('span.selling-price.omniture-field').attr['data-evar48'])inStock = 1try:sellerJson = pq('div.seller-table-wrap').attr['data-config']x = json.loads(sellerJson)lines = sorted(x['dataModel'], key=lambda k: k['priceInfo'].get('sellingPrice', 0), reverse=False)sellingPrice = float(lines[0]['priceInfo']['sellingPrice'])try:offerText = lines[0]['offerInfo']['listingOffers'][0]['description']except:offerText = ""return {'lowestSp':sellingPrice,'inStock':inStock,'buyBoxPrice':buyBoxPrice}except:"""Not able to parse seller wrap section, probably due to only single seller option"""sellingPrice = buyBoxPricestockDiv = pq('div.out-of-stock')if len(stockDiv) > 0:inStock = 0return {'lowestSp':sellingPrice,'inStock':inStock,'buyBoxPrice':buyBoxPrice}if __name__ == '__main__':print datetime.datetime.now()scraper = FlipkartProductPageScraper()print scraper.read('http://www.flipkart.com/redmi-2/p/itme8ygtcfax6w39')print datetime.datetime.now()