Subversion Repositories SmartDukaan

Rev

Rev 9167 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
9167 kshitij.so 1
from BeautifulSoup import BeautifulSoup
2
import mechanize
3
import sys
4
import cookielib
5
from time import sleep
6
 
7
def getBrowserObject():
8
    br = mechanize.Browser(factory=mechanize.RobustFactory())
9
    cj = cookielib.LWPCookieJar()
10
    br.set_cookiejar(cj)
11
    br.set_handle_equiv(True)
12
    br.set_handle_redirect(True)
13
    br.set_handle_referer(True)
14
    br.set_handle_robots(False)
15
    br.set_debug_http(False)
16
    br.set_debug_redirects(False)
17
    br.set_debug_responses(False)
18
 
19
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
20
 
21
    br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
22
                     ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
23
                     ('Accept-Encoding', 'gzip,deflate,sdch'),                  
24
                     ('Accept-Language', 'en-US,en;q=0.8'),                     
25
                     ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
26
    return br
27
 
28
def login(url):
29
    br = getBrowserObject()
30
    br.open(url)
31
    response = br.open(url)
32
    ungzipResponse(response, br)
33
    #html = response.read()
34
    #print html
35
    br.select_form(name="signinWidget")
36
    br.form['username'] = "amit.gupta@shop2020.in"
37
    br.form['password'] = "shop2020"
38
    response = br.submit()
39
    print "********************"
40
    print "Attempting to Login"
41
    print "********************"
42
    #ungzipResponse(response, br)
43
    return br
44
 
45
def requestReport(br,reportUrl):
46
    print "********************"
47
    print "Requesting Product Report"
48
    print "********************"
49
    br.open(reportUrl)
50
    return br
51
 
52
def checkStatus(br,statusUrl):
53
    response = br.open(statusUrl)
54
    ungzipResponse(response, br)
55
    page = response.read()
56
    return br, page
57
 
58
def getReportBatchId(br,page):
59
    print "********************"
60
    print "Fetching Batch Id for request"
61
    print "********************"
62
    soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
63
    table_rows = soup.find("tr" , {"class" : "list-row-even"})
64
    batchId = table_rows.findAll("td")
65
    return br , batchId[1].text
66
 
67
def downloadReport(br,batchId,statusUrl):
68
    print "********************"
69
    print "Check request status"
70
    print "********************"
71
    response = br.open(statusUrl)
72
    ungzipResponse(response, br)
73
    page = response.read()
74
    soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
75
    table_rows_even = soup.findAll("tr" , {"class" : "list-row-even"})
76
    table_rows_odd = soup.findAll("tr" , {"class" : "list-row-odd"})
77
    for tr in table_rows_even:
78
        row = tr.findAll("td")
79
        if row[1].text==batchId and row[4].text=='Ready':
80
            return br , row[5].find("a", {"class" : "buttonImage"})
81
 
82
    for tr in table_rows_odd:
83
        row = tr.findAll("td")
84
        if row[1].text==batchId and row[4].text=='Ready':
85
            return br , row[5].find("a", {"class" : "buttonImage"})
86
 
87
    return br,None
88
 
9176 amit.gupta 89
def fetchFile(link,br,batchId):
9167 kshitij.so 90
    print "********************"
91
    print "Writing file to disk"
92
    print "********************"
93
    response = br.open(link)
94
    print br.response().info()
95
    print br.response().read
96
    ungzipResponse(response,br)
9176 amit.gupta 97
    output = open(batchId+'.txt','wb')
9167 kshitij.so 98
    output.write(br.response().get_data())
99
    output.close()
100
 
101
 
102
def ungzipResponse(r,b):
103
    headers = r.info()
104
    if headers['Content-Encoding']=='gzip':
105
        import gzip
106
        print "********************"
107
        print "Deflating gzip response"
108
        print "********************"
109
        gz = gzip.GzipFile(fileobj=r, mode='rb')
110
        html = gz.read()
111
        gz.close()
112
        headers["Content-type"] = "text/html; charset=utf-8"
113
        r.set_data( html )
114
        b.set_response(r)
115
 
116
 
117
 
118
 
119
def main():
120
    print "Opening Seller Central login page"
121
    login_url = "https://sellercentral.amazon.in/gp/homepage.html"
122
    br = login(login_url)
123
    report_url = "https://sellercentral.amazon.in/gp/upload-download-utils/requestReport.html?ie=UTF8&type=nemoProductReports"
124
    br = requestReport(br,report_url)
125
    status_url="https://sellercentral.amazon.in/gp/upload-download-utils/reportStatusData.html"
126
    br, page = checkStatus(br,status_url)
127
    br, batchId = getReportBatchId(br,page)
128
    print "*********************************"
129
    print "Batch Id for request is ",batchId
130
    print "*********************************"
131
    ready = False
132
    retryCount = 0
133
    while not ready:
134
        if retryCount == 6:
135
            print "File not available for download after multiple retries"
136
            sys.exit(1)
137
        br, download_link = downloadReport(br,batchId,status_url)
138
        if download_link is not None:
139
            ready= True
140
            continue
141
        print "File not ready for download yet.Will try again after 10 minutes."
142
        retryCount+=1
143
        sleep(600)
9176 amit.gupta 144
    fetchFile(download_link['href'],br,batchId)
9167 kshitij.so 145
 
9176 amit.gupta 146
 
9167 kshitij.so 147
if __name__ == "__main__":
148
    main()