Blame | Last modification | View Log | RSS feed
#!/usr/bin/python2 -W ignoreimport mechanizeimport cookielibimport urllibimport optparseimport sysdef scrape(start,end):baseUrl = 'http://115.112.177.176/'br = mechanize.Browser(factory=mechanize.RobustFactory()) # because of bad html tags in the html...cj = cookielib.LWPCookieJar() #cookie containerbr.set_cookiejar(cj)br.set_handle_equiv(True)br.set_handle_redirect(True) #handling redirects due to jsbr.set_handle_referer(True)br.set_handle_robots(False)br.set_debug_http(True)br.set_debug_redirects(True)br.set_debug_responses(True)br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),('Accept-Encoding', 'gzip,deflate,sdch'),('Accept-Language', 'en-US,en;q=0.8'),('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]br.open(baseUrl)br.select_form(name="aspnetForm")br.form['ctl00$ContentPlaceHolder1$txtUserID'] = "SAHOLIC001"br.form['ctl00$ContentPlaceHolder1$txtpassword'] = "Saholic123$"#form.ctl00_ContentPlaceHolder1_txtpassword = 'Saholic123$'response = br.submit()br.select_form(name="aspnetForm")#print form#form.ctl00_ContentPlaceHolder1_txtUserID = 'SAHOLIC001'br.form['ctl00$ContentPlaceHolder1$ddlClient'] = ["14"]#form.ctl00_ContentPlaceHolder1_txtpassword = 'Saholic123$'response = br.submit()content = response.read()r=br.open(baseUrl+'frm_Sales_Declaration_Download.aspx')html = r.read()form=br.select_form(name="aspnetForm")br.form.set_all_readonly(False)print "Printing controls..........**********"for control in br.form.controls:print controlprint "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])br.form['ctl00$ContentPlaceHolder1$ddl_DateType'] = ["1"]br.form['ctl00$ContentPlaceHolder1$txt_startdate'] = startbr.form['ctl00$ContentPlaceHolder1$txt_enddate'] = endbr.submit("ctl00$ContentPlaceHolder1$btn_submit")print 'Response...\n'print br.response().info()print br.response().readprint br.response().get_data()output = open('sales_dec.xls','wb')output.write(br.response().get_data())output.close()print 'still alive...\n'for prop, value in vars(br.response()).iteritems():print 'Property:', prop, ', Value: ', valueprint '\ndir(br.response())\n'for each in dir(br.response()):print eachprint '\nresponse info...\n'print br.response().info()print '\nresponse geturl\n'print br.response().geturl()br.open("http://115.112.177.176/frm_Home.aspx")form=br.select_form(name="aspnetForm")br.form.set_all_readonly(False)br.form.new_control('text','__EVENTARGUMENT',{'value':''})br.form.new_control('text','__EVENTTARGET',{'value':''})br.form.fixup()br["__EVENTTARGET"][0] = 'ctl00$lnkLogout'br["__EVENTARGUMENT"][0] = ''response = br.submit()print reponse.read()def main():parser = optparse.OptionParser()usage = "Usage: %prog -S [startDate] -E [endDate]"parser.add_option("-S", "--start", dest="startDate",default="",type="string",help="Start Date")parser.add_option("-E", "--end", dest="endDate",default="",type="string",help="End Date")(options, args) = parser.parse_args()if ( not(options.startDate) or not(options.endDate) ):print "Arguments missing...."print "Example: UniversalInsurance.py -S 12/03/2013 -E 18/03/2013"print "Use: -h / --help to get help."sys.exit(1)scrape(options.startDate,options.endDate)if __name__ == "__main__":main()