Subversion Repositories SmartDukaan

Rev

Rev 12256 | Blame | Compare with Previous | Last modification | View Log | RSS feed

from BeautifulSoup import BeautifulSoup
import mechanize
import re

class SellerCentralScraper:

        def getBrowserObject(self):
            import cookielib
            br = mechanize.Browser(factory=mechanize.RobustFactory())
            cj = cookielib.LWPCookieJar()
            br.set_cookiejar(cj)
            br.set_handle_equiv(True)
            br.set_handle_redirect(True)
            br.set_handle_referer(True)
            br.set_handle_robots(False)
            br.set_debug_http(False)
            br.set_debug_redirects(False)
            br.set_debug_responses(False)
            
            br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
            
            br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
                             ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
                             ('Accept-Encoding', 'gzip,deflate,sdch'),                  
                             ('Accept-Language', 'en-US,en;q=0.8'),                     
                             ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
            return br
        
        def login(self,url):
            br = self.getBrowserObject()
            br.open(url)
            response = br.open(url)
            self.ungzipResponse(response, br)
            #html = response.read()
            #print html
            br.select_form(name="signinWidget")
            br.form['username'] = "kshitij.sood@saholic.com"
            br.form['password'] = "pioneer"
            response = br.submit()
            print "********************"
            print "Attempting to Login"
            print "********************"
            #ungzipResponse(response, br)
            return br
            
        def requestSku(self,br,skuUrl):
            print "********************"
            print "Requesting SKU Details"
            print "********************"
            response = br.open(skuUrl)
            self.ungzipResponse(response, br)
            page = response.read()
            response = None
            try:
                return self.getSkuDetails(br, page)
            except:
                return '',0,0
                
        def getSkuDetails(self,br,page):
            soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)
            table_rows = soup.find("table" , {"class" : "data-display manageTable"})
            skuDetails = table_rows.findAll("tr" , {"id" : re.compile('sku-.*')})
            skuRow = skuDetails[0].findAll("td")
            sku = str(skuRow[4].text)
            if sku.startswith('FBA'):
                isFba = True
            else:
                isFba = False
            asin = skuRow[5].text
            try:
                inputTag = skuDetails[0].find(attrs={"name": "inv"})
                inventory = inputTag['value']
            except:
                inventory = skuRow[8].text
            try:
                inputTag = skuDetails[0].find(attrs={"name": "price"})
                ourPrice = inputTag['value']
            except:
                ourPrice = skuRow[10].text
                
            return asin, inventory, ourPrice
            
        def ungzipResponse(self,r,b):
            headers = r.info()
            if headers['Content-Encoding']=='gzip':
                import gzip
                print "********************"
                print "Deflating gzip response"
                print "********************"
                gz = gzip.GzipFile(fileobj=r, mode='rb')
                html = gz.read()
                gz.close()
                headers["Content-type"] = "text/html; charset=utf-8"
                r.set_data( html )
                b.set_response(r)




def main():
    print "Opening Seller Central login page"
    login_url = "https://sellercentral.amazon.in/gp/homepage.html"
    sc = SellerCentralScraper()
    br = sc.login(login_url)
    sku_url = "https://sellercentral.amazon.in/myi/search/ProductSummary?keyword=2287"
    print sc.requestSku(br,sku_url)
    
    
if __name__ == "__main__":
    main()