Rev 9167 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from BeautifulSoup import BeautifulSoupimport mechanizeimport sysimport cookielibfrom time import sleepdef getBrowserObject():br = mechanize.Browser(factory=mechanize.RobustFactory())cj = cookielib.LWPCookieJar()br.set_cookiejar(cj)br.set_handle_equiv(True)br.set_handle_redirect(True)br.set_handle_referer(True)br.set_handle_robots(False)br.set_debug_http(False)br.set_debug_redirects(False)br.set_debug_responses(False)br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),('Accept-Encoding', 'gzip,deflate,sdch'),('Accept-Language', 'en-US,en;q=0.8'),('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]return brdef login(url):br = getBrowserObject()br.open(url)response = br.open(url)ungzipResponse(response, br)#html = response.read()#print htmlbr.select_form(name="signinWidget")br.form['username'] = "amit.gupta@shop2020.in"br.form['password'] = "shop2020"response = br.submit()print "********************"print "Attempting to Login"print "********************"#ungzipResponse(response, br)return brdef requestReport(br,reportUrl):print "********************"print "Requesting Product Report"print "********************"br.open(reportUrl)return brdef checkStatus(br,statusUrl):response = br.open(statusUrl)ungzipResponse(response, br)page = response.read()return br, pagedef getReportBatchId(br,page):print "********************"print "Fetching Batch Id for request"print "********************"soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)table_rows = soup.find("tr" , {"class" : "list-row-even"})batchId = table_rows.findAll("td")return br , batchId[1].textdef downloadReport(br,batchId,statusUrl):print "********************"print "Check request status"print "********************"response = br.open(statusUrl)ungzipResponse(response, br)page = response.read()soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)table_rows_even = soup.findAll("tr" , {"class" : "list-row-even"})table_rows_odd = soup.findAll("tr" , {"class" : "list-row-odd"})for tr in table_rows_even:row = tr.findAll("td")if row[1].text==batchId and row[4].text=='Ready':return br , row[5].find("a", {"class" : "buttonImage"})for tr in table_rows_odd:row = tr.findAll("td")if row[1].text==batchId and row[4].text=='Ready':return br , row[5].find("a", {"class" : "buttonImage"})return br,Nonedef fetchFile(link,br,batchId):print "********************"print "Writing file to disk"print "********************"response = br.open(link)print br.response().info()print br.response().readungzipResponse(response,br)output = open(batchId+'.txt','wb')output.write(br.response().get_data())output.close()def ungzipResponse(r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()headers["Content-type"] = "text/html; charset=utf-8"r.set_data( html )b.set_response(r)def main():print "Opening Seller Central login page"login_url = "https://sellercentral.amazon.in/gp/homepage.html"br = login(login_url)report_url = "https://sellercentral.amazon.in/gp/upload-download-utils/requestReport.html?ie=UTF8&type=nemoProductReports"br = requestReport(br,report_url)status_url="https://sellercentral.amazon.in/gp/upload-download-utils/reportStatusData.html"br, page = checkStatus(br,status_url)br, batchId = getReportBatchId(br,page)print "*********************************"print "Batch Id for request is ",batchIdprint "*********************************"ready = FalseretryCount = 0while not ready:if retryCount == 6:print "File not available for download after multiple retries"sys.exit(1)br, download_link = downloadReport(br,batchId,status_url)if download_link is not None:ready= Truecontinueprint "File not ready for download yet.Will try again after 10 minutes."retryCount+=1sleep(600)fetchFile(download_link['href'],br,batchId)if __name__ == "__main__":main()