Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
7107 kshitij.so 1
#!/usr/bin/python2 -W ignore
2
 
3
import mechanize
4
import cookielib
5
import urllib
6
import optparse
7
import sys
8
 
9
def scrape(start,end):
10
    baseUrl   = 'http://115.112.177.176/'
11
 
12
    br = mechanize.Browser(factory=mechanize.RobustFactory()) # because of bad html tags in the html...
13
    cj = cookielib.LWPCookieJar() #cookie container
14
    br.set_cookiejar(cj)
15
    br.set_handle_equiv(True)
16
    br.set_handle_redirect(True) #handling redirects due to js
17
    br.set_handle_referer(True)
18
    br.set_handle_robots(False)
19
    br.set_debug_http(True)
20
    br.set_debug_redirects(True)
21
    br.set_debug_responses(True)
22
 
23
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
24
 
25
    br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
26
                     ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
27
                     ('Accept-Encoding', 'gzip,deflate,sdch'),                  
28
                     ('Accept-Language', 'en-US,en;q=0.8'),                     
29
                     ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
30
 
31
    br.open(baseUrl)
32
    br.select_form(name="aspnetForm")
33
    br.form['ctl00$ContentPlaceHolder1$txtUserID'] = "SAHOLIC001"
34
    br.form['ctl00$ContentPlaceHolder1$txtpassword'] = "Saholic123$"
35
    #form.ctl00_ContentPlaceHolder1_txtpassword = 'Saholic123$'
36
    response = br.submit()
37
    br.select_form(name="aspnetForm")
38
    #print form
39
    #form.ctl00_ContentPlaceHolder1_txtUserID = 'SAHOLIC001'
40
    br.form['ctl00$ContentPlaceHolder1$ddlClient'] = ["14"]
41
    #form.ctl00_ContentPlaceHolder1_txtpassword = 'Saholic123$'
42
    response = br.submit()
43
    content = response.read()
44
    r=br.open(baseUrl+'frm_Sales_Declaration_Download.aspx')
45
    html = r.read()
46
    form=br.select_form(name="aspnetForm")
47
    br.form.set_all_readonly(False)
48
    print "Printing controls..........**********"
49
    for control in br.form.controls:
50
        print control
51
        print "type=%s, name=%s value=%s" % (control.type, control.name, br[control.name])
52
    br.form['ctl00$ContentPlaceHolder1$ddl_DateType'] = ["1"]
53
    br.form['ctl00$ContentPlaceHolder1$txt_startdate'] = start
54
    br.form['ctl00$ContentPlaceHolder1$txt_enddate'] = end
55
    br.submit("ctl00$ContentPlaceHolder1$btn_submit")
56
    print 'Response...\n'
57
    print br.response().info()
58
    print br.response().read
59
    print br.response().get_data()
60
    output = open('sales_dec.xls','wb')
61
    output.write(br.response().get_data())
62
    output.close()
63
 
64
    print 'still alive...\n'
65
 
66
    for prop, value in vars(br.response()).iteritems():
67
        print 'Property:', prop, ', Value: ', value
68
 
69
    print '\ndir(br.response())\n'
70
    for each in dir(br.response()):
71
        print each
72
 
73
    print '\nresponse info...\n'
74
    print br.response().info()
75
 
76
    print '\nresponse geturl\n'
77
    print br.response().geturl()
78
    br.open("http://115.112.177.176/frm_Home.aspx")
79
    form=br.select_form(name="aspnetForm")
80
    br.form.set_all_readonly(False)
81
    br.form.new_control('text','__EVENTARGUMENT',{'value':''})
82
    br.form.new_control('text','__EVENTTARGET',{'value':''})
83
    br.form.fixup()
84
    br["__EVENTTARGET"][0] = 'ctl00$lnkLogout'
85
    br["__EVENTARGUMENT"][0] = ''
86
    response = br.submit()
87
    print reponse.read()
88
 
89
 
90
def main():
91
    parser = optparse.OptionParser()
92
    usage = "Usage: %prog -S [startDate] -E [endDate]"
93
    parser.add_option("-S", "--start", dest="startDate",default="",type="string",help="Start Date")
94
    parser.add_option("-E", "--end", dest="endDate",default="",type="string",help="End Date")
95
    (options, args) = parser.parse_args()
96
    if ( not(options.startDate) or not(options.endDate) ):
97
        print "Arguments missing...." 
98
        print "Example: UniversalInsurance.py -S 12/03/2013 -E 18/03/2013"
99
        print "Use: -h / --help to get help."
100
        sys.exit(1)
101
    scrape(options.startDate,options.endDate)
102
 
103
if __name__ == "__main__":
104
    main()
105