Subversion Repositories SmartDukaan

Rev

Rev 9167 | Blame | Compare with Previous | Last modification | View Log | RSS feed

from BeautifulSoup import BeautifulSoup
import mechanize
import sys
import cookielib
from time import sleep

def getBrowserObject():
    br = mechanize.Browser(factory=mechanize.RobustFactory())
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.set_handle_equiv(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)
    br.set_debug_http(False)
    br.set_debug_redirects(False)
    br.set_debug_responses(False)
    
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    
    br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
                     ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
                     ('Accept-Encoding', 'gzip,deflate,sdch'),                  
                     ('Accept-Language', 'en-US,en;q=0.8'),                     
                     ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
    return br

def login(url):
    br = getBrowserObject()
    br.open(url)
    response = br.open(url)
    ungzipResponse(response, br)
    #html = response.read()
    #print html
    br.select_form(name="signinWidget")
    br.form['username'] = "amit.gupta@shop2020.in"
    br.form['password'] = "shop2020"
    response = br.submit()
    print "********************"
    print "Attempting to Login"
    print "********************"
    #ungzipResponse(response, br)
    return br

def requestReport(br,reportUrl):
    print "********************"
    print "Requesting Product Report"
    print "********************"
    br.open(reportUrl)
    return br

def checkStatus(br,statusUrl):
    response = br.open(statusUrl)
    ungzipResponse(response, br)
    page = response.read()
    return br, page

def getReportBatchId(br,page):
    print "********************"
    print "Fetching Batch Id for request"
    print "********************"
    soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
    table_rows = soup.find("tr" , {"class" : "list-row-even"})
    batchId = table_rows.findAll("td")
    return br , batchId[1].text

def downloadReport(br,batchId,statusUrl):
    print "********************"
    print "Check request status"
    print "********************"
    response = br.open(statusUrl)
    ungzipResponse(response, br)
    page = response.read()
    soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
    table_rows_even = soup.findAll("tr" , {"class" : "list-row-even"})
    table_rows_odd = soup.findAll("tr" , {"class" : "list-row-odd"})
    for tr in table_rows_even:
        row = tr.findAll("td")
        if row[1].text==batchId and row[4].text=='Ready':
            return br , row[5].find("a", {"class" : "buttonImage"})
            
    for tr in table_rows_odd:
        row = tr.findAll("td")
        if row[1].text==batchId and row[4].text=='Ready':
            return br , row[5].find("a", {"class" : "buttonImage"})
    
    return br,None
            
def fetchFile(link,br,batchId):
    print "********************"
    print "Writing file to disk"
    print "********************"
    response = br.open(link)
    print br.response().info()
    print br.response().read
    ungzipResponse(response,br)
    output = open(batchId+'.txt','wb')
    output.write(br.response().get_data())
    output.close()
    

def ungzipResponse(r,b):
    headers = r.info()
    if headers['Content-Encoding']=='gzip':
        import gzip
        print "********************"
        print "Deflating gzip response"
        print "********************"
        gz = gzip.GzipFile(fileobj=r, mode='rb')
        html = gz.read()
        gz.close()
        headers["Content-type"] = "text/html; charset=utf-8"
        r.set_data( html )
        b.set_response(r)




def main():
    print "Opening Seller Central login page"
    login_url = "https://sellercentral.amazon.in/gp/homepage.html"
    br = login(login_url)
    report_url = "https://sellercentral.amazon.in/gp/upload-download-utils/requestReport.html?ie=UTF8&type=nemoProductReports"
    br = requestReport(br,report_url)
    status_url="https://sellercentral.amazon.in/gp/upload-download-utils/reportStatusData.html"
    br, page = checkStatus(br,status_url)
    br, batchId = getReportBatchId(br,page)
    print "*********************************"
    print "Batch Id for request is ",batchId
    print "*********************************"
    ready = False
    retryCount = 0
    while not ready:
        if retryCount == 6:
            print "File not available for download after multiple retries"
            sys.exit(1)
        br, download_link = downloadReport(br,batchId,status_url)
        if download_link is not None:
            ready= True
            continue
        print "File not ready for download yet.Will try again after 10 minutes."
        retryCount+=1
        sleep(600)
    fetchFile(download_link['href'],br,batchId)
    
    
if __name__ == "__main__":
    main()