Rev 17013 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from dtr.utils.utils import fetchResponseUsingProxyimport reimport datetimefrom pyquery import PyQueryimport tracebackimport jsonimport urllib2import gzipimport StringIOheaders = {'User-Agent':'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19','Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8','Accept-Language' : 'en-US,en;q=0.8','Accept-Charset' : 'ISO-8859-1,utf-8;q=0.7,*;q=0.3','Connection':'keep-alive','Accept-Encoding' : 'gzip,deflate,sdch','Host' : 'm.homeshop18.com'}class HomeShop18Scraper:def __init__(self, livePricing=None):self.count_trials = 0self.livePricing = livePricingdef read(self, url):response_data = ""try:response_data = fetchResponseUsingProxy(url,headers=headers,livePricing=self.livePricing,proxy=True)except Exception as e:print 'ERROR: ', eprint 'Retrying'self.count_trials += 1if self.count_trials < 5:return self.read(url)self.response_data=response_dataif "Server Busy" in self.response_data:self.count_trials += 1return self.read(url)return self.createData()def createData(self):#print self.response_datapq = PyQuery(self.response_data)tag = pq.find("script")#for tag inrequiredJson = Nonefor val in str(tag).split('\n'):if 'hs18Cache.addProductItems(' in val:requiredJson = json.loads(val.strip().split('hs18Cache.addProductItems(')[1].split(');')[0])thumbnail = 'http://stat.homeshop18.com/homeshop18'+str(requiredJson['imageUrl'])inStock = 0totalStock = 0sellingPrice = 0shippingCharge = 0for item in requiredJson.get('itemList'):totalStock = totalStock + long(item['stockQuantity'])if sellingPrice ==0:sellingPrice = long(item.get('sellingPrice'))if item.get('shippingCharge') is not None and shippingCharge ==0:shippingCharge = long(item.get('shippingCharge'))if totalStock>0:inStock = 1print inStock, sellingPrice, shippingCharge, thumbnailreturn {'productId':str(requiredJson['productId']),'price':sellingPrice,'inStock':inStock,'shippingCharge':shippingCharge,'thumbnail':thumbnail}if __name__ == '__main__':print datetime.datetime.now()scraper = HomeShop18Scraper()print scraper.read('http://m.homeshop18.com/product.mobi?productId=32998885')#print scraper.read('http://www.homeshop18.com/spice-full-touch-dual-sim-phone-m6112/mobiles/mobile-phones/product:32866119/cid:3027/')#print scraper.read('http://m.homeshop18.com/product/stock.mobi?zipCode=110001&productId=32866119')print datetime.datetime.now()