Rev 12256 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from BeautifulSoup import BeautifulSoupimport mechanizeimport reclass SellerCentralScraper:def getBrowserObject(self):import cookielibbr = mechanize.Browser(factory=mechanize.RobustFactory())cj = cookielib.LWPCookieJar()br.set_cookiejar(cj)br.set_handle_equiv(True)br.set_handle_redirect(True)br.set_handle_referer(True)br.set_handle_robots(False)br.set_debug_http(False)br.set_debug_redirects(False)br.set_debug_responses(False)br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),('Accept-Encoding', 'gzip,deflate,sdch'),('Accept-Language', 'en-US,en;q=0.8'),('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]return brdef login(self,url):br = self.getBrowserObject()br.open(url)response = br.open(url)self.ungzipResponse(response, br)#html = response.read()#print htmlbr.select_form(name="signinWidget")br.form['username'] = "kshitij.sood@saholic.com"br.form['password'] = "pioneer"response = br.submit()print "********************"print "Attempting to Login"print "********************"#ungzipResponse(response, br)return brdef requestSku(self,br,skuUrl):print "********************"print "Requesting SKU Details"print "********************"response = br.open(skuUrl)self.ungzipResponse(response, br)page = response.read()response = Nonetry:return self.getSkuDetails(br, page)except:return '',0,0def getSkuDetails(self,br,page):soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)table_rows = soup.find("table" , {"class" : "data-display manageTable"})skuDetails = table_rows.findAll("tr" , {"id" : re.compile('sku-.*')})skuRow = skuDetails[0].findAll("td")sku = str(skuRow[4].text)if sku.startswith('FBA'):isFba = Trueelse:isFba = Falseasin = skuRow[5].texttry:inputTag = skuDetails[0].find(attrs={"name": "inv"})inventory = inputTag['value']except:inventory = skuRow[8].texttry:inputTag = skuDetails[0].find(attrs={"name": "price"})ourPrice = inputTag['value']except:ourPrice = skuRow[10].textreturn asin, inventory, ourPricedef ungzipResponse(self,r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()headers["Content-type"] = "text/html; charset=utf-8"r.set_data( html )b.set_response(r)def main():print "Opening Seller Central login page"login_url = "https://sellercentral.amazon.in/gp/homepage.html"sc = SellerCentralScraper()br = sc.login(login_url)sku_url = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=2287"print sc.requestSku(br,sku_url)if __name__ == "__main__":main()