| 7107 |
kshitij.so |
1 |
#!/usr/bin/python2 -W ignore
|
|
|
2 |
|
|
|
3 |
import mechanize
|
|
|
4 |
import cookielib
|
|
|
5 |
import urllib
|
|
|
6 |
import optparse
|
|
|
7 |
import sys
|
|
|
8 |
|
|
|
9 |
def scrape(start,end):
|
|
|
10 |
baseUrl = 'http://115.112.177.176/'
|
|
|
11 |
|
|
|
12 |
br = mechanize.Browser(factory=mechanize.RobustFactory()) # because of bad html tags in the html...
|
|
|
13 |
cj = cookielib.LWPCookieJar() #cookie container
|
|
|
14 |
br.set_cookiejar(cj)
|
|
|
15 |
br.set_handle_equiv(True)
|
|
|
16 |
br.set_handle_redirect(True) #handling redirects due to js
|
|
|
17 |
br.set_handle_referer(True)
|
|
|
18 |
br.set_handle_robots(False)
|
|
|
19 |
br.set_debug_http(True)
|
|
|
20 |
br.set_debug_redirects(True)
|
|
|
21 |
br.set_debug_responses(True)
|
|
|
22 |
|
|
|
23 |
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
|
|
|
24 |
|
|
|
25 |
br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
|
|
|
26 |
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
|
|
|
27 |
('Accept-Encoding', 'gzip,deflate,sdch'),
|
|
|
28 |
('Accept-Language', 'en-US,en;q=0.8'),
|
|
|
29 |
('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
|
|
|
30 |
|
|
|
31 |
br.open(baseUrl)
|
|
|
32 |
br.select_form(name="aspnetForm")
|
|
|
33 |
br.form['ctl00$ContentPlaceHolder1$txtUserID'] = "SAHOLIC001"
|
|
|
34 |
br.form['ctl00$ContentPlaceHolder1$txtpassword'] = "Saholic123$"
|
|
|
35 |
#form.ctl00_ContentPlaceHolder1_txtpassword = 'Saholic123$'
|
|
|
36 |
response = br.submit()
|
|
|
37 |
br.select_form(name="aspnetForm")
|
|
|
38 |
#print form
|
|
|
39 |
#form.ctl00_ContentPlaceHolder1_txtUserID = 'SAHOLIC001'
|
|
|
40 |
br.form['ctl00$ContentPlaceHolder1$ddlClient'] = ["14"]
|
|
|
41 |
#form.ctl00_ContentPlaceHolder1_txtpassword = 'Saholic123$'
|
|
|
42 |
response = br.submit()
|
|
|
43 |
content = response.read()
|
|
|
44 |
r=br.open(baseUrl+'frm_Sales_Declaration_Download.aspx')
|
|
|
45 |
html = r.read()
|
|
|
46 |
form=br.select_form(name="aspnetForm")
|
|
|
47 |
br.form.set_all_readonly(False)
|
|
|
48 |
print "Printing controls..........**********"
|
|
|
49 |
for control in br.form.controls:
|
|
|
50 |
print control
|
|
|
51 |
print "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])
|
|
|
52 |
br.form['ctl00$ContentPlaceHolder1$ddl_DateType'] = ["1"]
|
|
|
53 |
br.form['ctl00$ContentPlaceHolder1$txt_startdate'] = start
|
|
|
54 |
br.form['ctl00$ContentPlaceHolder1$txt_enddate'] = end
|
|
|
55 |
br.submit("ctl00$ContentPlaceHolder1$btn_submit")
|
|
|
56 |
print 'Response...\n'
|
|
|
57 |
print br.response().info()
|
|
|
58 |
print br.response().read
|
|
|
59 |
print br.response().get_data()
|
|
|
60 |
output = open('sales_dec.xls','wb')
|
|
|
61 |
output.write(br.response().get_data())
|
|
|
62 |
output.close()
|
|
|
63 |
|
|
|
64 |
print 'still alive...\n'
|
|
|
65 |
|
|
|
66 |
for prop, value in vars(br.response()).iteritems():
|
|
|
67 |
print 'Property:', prop, ', Value: ', value
|
|
|
68 |
|
|
|
69 |
print '\ndir(br.response())\n'
|
|
|
70 |
for each in dir(br.response()):
|
|
|
71 |
print each
|
|
|
72 |
|
|
|
73 |
print '\nresponse info...\n'
|
|
|
74 |
print br.response().info()
|
|
|
75 |
|
|
|
76 |
print '\nresponse geturl\n'
|
|
|
77 |
print br.response().geturl()
|
|
|
78 |
br.open("http://115.112.177.176/frm_Home.aspx")
|
|
|
79 |
form=br.select_form(name="aspnetForm")
|
|
|
80 |
br.form.set_all_readonly(False)
|
|
|
81 |
br.form.new_control('text','__EVENTARGUMENT',{'value':''})
|
|
|
82 |
br.form.new_control('text','__EVENTTARGET',{'value':''})
|
|
|
83 |
br.form.fixup()
|
|
|
84 |
br["__EVENTTARGET"][0] = 'ctl00$lnkLogout'
|
|
|
85 |
br["__EVENTARGUMENT"][0] = ''
|
|
|
86 |
response = br.submit()
|
|
|
87 |
print reponse.read()
|
|
|
88 |
|
|
|
89 |
|
|
|
90 |
def main():
|
|
|
91 |
parser = optparse.OptionParser()
|
|
|
92 |
usage = "Usage: %prog -S [startDate] -E [endDate]"
|
|
|
93 |
parser.add_option("-S", "--start", dest="startDate",default="",type="string",help="Start Date")
|
|
|
94 |
parser.add_option("-E", "--end", dest="endDate",default="",type="string",help="End Date")
|
|
|
95 |
(options, args) = parser.parse_args()
|
|
|
96 |
if ( not(options.startDate) or not(options.endDate) ):
|
|
|
97 |
print "Arguments missing...."
|
|
|
98 |
print "Example: UniversalInsurance.py -S 12/03/2013 -E 18/03/2013"
|
|
|
99 |
print "Use: -h / --help to get help."
|
|
|
100 |
sys.exit(1)
|
|
|
101 |
scrape(options.startDate,options.endDate)
|
|
|
102 |
|
|
|
103 |
if __name__ == "__main__":
|
|
|
104 |
main()
|
|
|
105 |
|