Rev 9236 | Blame | Compare with Previous | Last modification | View Log | RSS feed
from BeautifulSoup import BeautifulSoupimport mechanizeimport urllib2import sysimport urllibimport cookielibimport urllibdef getBrowserObject():br = mechanize.Browser(factory=mechanize.RobustFactory())cj = cookielib.LWPCookieJar()br.set_cookiejar(cj)br.set_handle_equiv(True)br.set_handle_redirect(True)br.set_handle_referer(True)br.set_handle_robots(False)br.set_debug_http(False)br.set_debug_redirects(False)br.set_debug_responses(False)br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),('Accept-Encoding', 'gzip,deflate,sdch'),('Accept-Language', 'en-US,en;q=0.8'),('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]return brdef login(url):br = getBrowserObject()br.open(url)response = br.open(url)ungzipResponse(response, br)html = response.read()br.select_form(name="login")br.form['unmae'] = "saholic"br.form['pword'] = "2020shop"response = br.submit()print "********************"print "Attempting to Login"print "********************"ungzipResponse(response, br)return brdef fetchItemDetails(merchant_url,br):response = br.open(merchant_url)ungzipResponse(response, br)page = response.read()page=page.decode("utf-8")soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)table = soup.find("table" , {"class" : "stdtable tablesorter"})table_rows = soup.findAll("tr" , {"class" : "gradeX"})data = []for tr in table_rows:row_count=1new = []for t in tr:if row_count==4:print "********************"print "Product name field :",te =str(t)start = te.rindex("mid=\"") + len( "mid=\"" )end = te.rindex( "\">", start )parse= te[start:end]print parsenew.append(parse)if row_count==8:print "********************"print "Cheapest price :",t.textcheapest_price = t.textcheapest_price = cheapest_price.replace("Rs.",'')cheapest_price = int(cheapest_price.replace(",",''))new.append(cheapest_price)if row_count==16:print "********************"print "Saholic price :",t.textour_price = t.textour_price = our_price.replace("Rs.",'')our_price = int(our_price.replace(",",''))new.append(our_price)row_count+=1data.append(new)print "******************************"print "Data Populated from 91 Mobiles"print "******************************"return datadef getSaholicEntityId(data):br = getBrowserObject()for i in data:response = br.open("http://www.91mobiles.com/redir.php?origin=detail&mobileid=%s&storename=saholic.com&sf=&storeflag=1&cc="%(i[0]))ungzipResponse(response, br)page = response.read()page=page.decode("utf-8")soup = BeautifulSoup(page,convertEntities=BeautifulSoup.HTML_ENTITIES)link = soup.find("body" , {"class" : ""})for a in soup.findAll('a', href=True):url = str(a['href'])end = url.rindex("?afid")our_url = url[0:end]length= len(our_url)ind = our_url.rfind("-")entityId = url[ind+1:length]print "*****************************"print "Trying to fetch entity id...."print "EntityId :",entityIdprint "*****************************"def ungzipResponse(r,b):headers = r.info()if headers['Content-Encoding']=='gzip':import gzipprint "********************"print "Deflating gzip response"print "********************"gz = gzip.GzipFile(fileobj=r, mode='rb')html = gz.read()gz.close()headers["Content-type"] = "text/html; charset=utf-8"r.set_data( html )b.set_response(r)def main():print "Opening 91 Mobiles merchant login page"login_url = "http://www.91mobiles.com/91merchants/login.php"merchant_url = "http://www.91mobiles.com/91merchants/manage_merchants.php"br = login(login_url)itemData = fetchItemDetails(merchant_url,br)getSaholicEntityId(itemData)if __name__ == "__main__":main()