| 9167 |
kshitij.so |
1 |
from BeautifulSoup import BeautifulSoup
|
|
|
2 |
import mechanize
|
|
|
3 |
import sys
|
|
|
4 |
import cookielib
|
|
|
5 |
from time import sleep
|
|
|
6 |
|
|
|
7 |
def getBrowserObject():
|
|
|
8 |
br = mechanize.Browser(factory=mechanize.RobustFactory())
|
|
|
9 |
cj = cookielib.LWPCookieJar()
|
|
|
10 |
br.set_cookiejar(cj)
|
|
|
11 |
br.set_handle_equiv(True)
|
|
|
12 |
br.set_handle_redirect(True)
|
|
|
13 |
br.set_handle_referer(True)
|
|
|
14 |
br.set_handle_robots(False)
|
|
|
15 |
br.set_debug_http(False)
|
|
|
16 |
br.set_debug_redirects(False)
|
|
|
17 |
br.set_debug_responses(False)
|
|
|
18 |
|
|
|
19 |
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
|
|
|
20 |
|
|
|
21 |
br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
|
|
|
22 |
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
|
|
|
23 |
('Accept-Encoding', 'gzip,deflate,sdch'),
|
|
|
24 |
('Accept-Language', 'en-US,en;q=0.8'),
|
|
|
25 |
('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
|
|
|
26 |
return br
|
|
|
27 |
|
|
|
28 |
def login(url):
|
|
|
29 |
br = getBrowserObject()
|
|
|
30 |
br.open(url)
|
|
|
31 |
response = br.open(url)
|
|
|
32 |
ungzipResponse(response, br)
|
|
|
33 |
#html = response.read()
|
|
|
34 |
#print html
|
|
|
35 |
br.select_form(name="signinWidget")
|
|
|
36 |
br.form['username'] = "amit.gupta@shop2020.in"
|
|
|
37 |
br.form['password'] = "shop2020"
|
|
|
38 |
response = br.submit()
|
|
|
39 |
print "********************"
|
|
|
40 |
print "Attempting to Login"
|
|
|
41 |
print "********************"
|
|
|
42 |
#ungzipResponse(response, br)
|
|
|
43 |
return br
|
|
|
44 |
|
|
|
45 |
def requestReport(br,reportUrl):
|
|
|
46 |
print "********************"
|
|
|
47 |
print "Requesting Product Report"
|
|
|
48 |
print "********************"
|
|
|
49 |
br.open(reportUrl)
|
|
|
50 |
return br
|
|
|
51 |
|
|
|
52 |
def checkStatus(br,statusUrl):
|
|
|
53 |
response = br.open(statusUrl)
|
|
|
54 |
ungzipResponse(response, br)
|
|
|
55 |
page = response.read()
|
|
|
56 |
return br, page
|
|
|
57 |
|
|
|
58 |
def getReportBatchId(br,page):
|
|
|
59 |
print "********************"
|
|
|
60 |
print "Fetching Batch Id for request"
|
|
|
61 |
print "********************"
|
|
|
62 |
soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
|
|
|
63 |
table_rows = soup.find("tr" , {"class" : "list-row-even"})
|
|
|
64 |
batchId = table_rows.findAll("td")
|
|
|
65 |
return br , batchId[1].text
|
|
|
66 |
|
|
|
67 |
def downloadReport(br,batchId,statusUrl):
|
|
|
68 |
print "********************"
|
|
|
69 |
print "Check request status"
|
|
|
70 |
print "********************"
|
|
|
71 |
response = br.open(statusUrl)
|
|
|
72 |
ungzipResponse(response, br)
|
|
|
73 |
page = response.read()
|
|
|
74 |
soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
|
|
|
75 |
table_rows_even = soup.findAll("tr" , {"class" : "list-row-even"})
|
|
|
76 |
table_rows_odd = soup.findAll("tr" , {"class" : "list-row-odd"})
|
|
|
77 |
for tr in table_rows_even:
|
|
|
78 |
row = tr.findAll("td")
|
|
|
79 |
if row[1].text==batchId and row[4].text=='Ready':
|
|
|
80 |
return br , row[5].find("a", {"class" : "buttonImage"})
|
|
|
81 |
|
|
|
82 |
for tr in table_rows_odd:
|
|
|
83 |
row = tr.findAll("td")
|
|
|
84 |
if row[1].text==batchId and row[4].text=='Ready':
|
|
|
85 |
return br , row[5].find("a", {"class" : "buttonImage"})
|
|
|
86 |
|
|
|
87 |
return br,None
|
|
|
88 |
|
|
|
89 |
def fetchFile(link,br):
|
|
|
90 |
print "********************"
|
|
|
91 |
print "Writing file to disk"
|
|
|
92 |
print "********************"
|
|
|
93 |
response = br.open(link)
|
|
|
94 |
print br.response().info()
|
|
|
95 |
print br.response().read
|
|
|
96 |
ungzipResponse(response,br)
|
|
|
97 |
output = open('1.txt','wb')
|
|
|
98 |
output.write(br.response().get_data())
|
|
|
99 |
output.close()
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
def ungzipResponse(r,b):
|
|
|
103 |
headers = r.info()
|
|
|
104 |
if headers['Content-Encoding']=='gzip':
|
|
|
105 |
import gzip
|
|
|
106 |
print "********************"
|
|
|
107 |
print "Deflating gzip response"
|
|
|
108 |
print "********************"
|
|
|
109 |
gz = gzip.GzipFile(fileobj=r, mode='rb')
|
|
|
110 |
html = gz.read()
|
|
|
111 |
gz.close()
|
|
|
112 |
headers["Content-type"] = "text/html; charset=utf-8"
|
|
|
113 |
r.set_data( html )
|
|
|
114 |
b.set_response(r)
|
|
|
115 |
|
|
|
116 |
|
|
|
117 |
|
|
|
118 |
|
|
|
119 |
def main():
|
|
|
120 |
print "Opening Seller Central login page"
|
|
|
121 |
login_url = "https://sellercentral.amazon.in/gp/homepage.html"
|
|
|
122 |
br = login(login_url)
|
|
|
123 |
report_url = "https://sellercentral.amazon.in/gp/upload-download-utils/requestReport.html?ie=UTF8&type=nemoProductReports"
|
|
|
124 |
br = requestReport(br,report_url)
|
|
|
125 |
status_url="https://sellercentral.amazon.in/gp/upload-download-utils/reportStatusData.html"
|
|
|
126 |
br, page = checkStatus(br,status_url)
|
|
|
127 |
br, batchId = getReportBatchId(br,page)
|
|
|
128 |
print "*********************************"
|
|
|
129 |
print "Batch Id for request is ",batchId
|
|
|
130 |
print "*********************************"
|
|
|
131 |
ready = False
|
|
|
132 |
retryCount = 0
|
|
|
133 |
while not ready:
|
|
|
134 |
if retryCount == 6:
|
|
|
135 |
print "File not available for download after multiple retries"
|
|
|
136 |
sys.exit(1)
|
|
|
137 |
br, download_link = downloadReport(br,batchId,status_url)
|
|
|
138 |
if download_link is not None:
|
|
|
139 |
ready= True
|
|
|
140 |
continue
|
|
|
141 |
print "File not ready for download yet.Will try again after 10 minutes."
|
|
|
142 |
retryCount+=1
|
|
|
143 |
sleep(600)
|
|
|
144 |
fetchFile(download_link['href'],br)
|
|
|
145 |
|
|
|
146 |
if __name__ == "__main__":
|
|
|
147 |
main()
|