WebSVN – SmartDukaan – Blame – /trunk/runutils/rankcheck.py

Rev	Author	Line No.	Line
1746	vikas	1	`#!/usr/bin/python`
		2
		3	`"""`
		4
		5	`This script accepts Domain, Search String and Google Locale arguments, then returns`
		6	`which Search String results page for the Google Locale the Domain appears on.`
		7
		8
		9	`Usage example:`
		10
		11	`rankcheck {domain} {searchstring} {locale}`
		12
		13
		14	`Output example:`
		15
		16	`rankcheck geekology.co.za 'bash scripting' .co.za`
		17	`- The domain 'geekology.co.za' is listed in position 2 (page 1) for the search 'bash+scripting' on google.co.za`
		18
		19	`"""`
		20
		21	`__author__ = "Willem van Zyl (willem@geekology.co.za)"`
		22	`__version__ = "$Revision: 1.5 $"`
		23	`__date__ = "$Date: 2009/02/10 12:10:24 $"`
		24	`__license__ = "GPLv3"`
		25
		26	`import sys, pycurl, re`
		27	`import datetime`
		28
		29	`# Search Strings to be monitored`
		30	`SEARCH_STRINGS = ['spice qt 68']`
		31	`# some initial setup:`
		32	`USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0)'`
		33	`# USER_AGENT = 'Mozilla/5.0'`
		34	`FIND_DOMAIN = 'www.saholic.com'`
		35	`LOCALE = '.co.in'`
		36	`MAX_PAGE = 1`
		37	`NUM_PER_PAGE = 100`
		38
		39	`# define class to store result:`
		40	`class RankCheck:`
		41	`def __init__(self):`
		42	`self.contents = ''`
		43
		44	`def body_callback(self, buf):`
		45	`self.contents = self.contents + buf`
		46
		47
		48	`def main():`
		49	`for search_string in SEARCH_STRINGS:`
		50	`find_google_position(search_string)`
		51
		52	`def init_curl(rankRequest, rankCheck):`
		53	`# set up curl:`
		54	`rankRequest.setopt(pycurl.USERAGENT, USER_AGENT)`
		55	`rankRequest.setopt(pycurl.FOLLOWLOCATION, 1)`
		56	`rankRequest.setopt(pycurl.AUTOREFERER, 1)`
		57	`rankRequest.setopt(pycurl.WRITEFUNCTION, rankCheck.body_callback)`
		58	`rankRequest.setopt(pycurl.COOKIEFILE, '')`
		59	`rankRequest.setopt(pycurl.HTTPGET, 1)`
		60	`rankRequest.setopt(pycurl.REFERER, '')`
		61
		62	`def search_page(page, page_url):`
		63	`# instantiate curl and result objects:`
		64	`rankRequest = pycurl.Curl()`
		65	`rankCheck = RankCheck();`
		66	`init_curl(rankRequest, rankCheck)`
		67	`rankRequest.setopt(pycurl.URL, page_url + '&start=' + str(page * NUM_PER_PAGE))`
		68	`rankRequest.perform()`
		69	`# close curl:`
		70	`rankRequest.close()`
		71
		72	`# collect the search results`
		73	`html = rankCheck.contents`
		74	`counter = page*NUM_PER_PAGE`
		75	`result = 0`
		76
		77	`url=unicode(r'(<h3 class="r"><a href=")((https?):((//))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)')`
		78	`for google_result in re.finditer(url, html):`
		79	`# print m.group()`
		80	`this_url = google_result.group()`
		81	`this_url = this_url[23:]`
		82	`counter += 1`
		83
		84	`google_url_regex = re.compile("((https?):((//))+([\w\d:#@%/;$()~_?\+-=\\\.&])" + FIND_DOMAIN + "+([\w\d:#@%/;$()~_?\+-=\\\.&]))")`
		85	`google_url_regex_result = google_url_regex.match(this_url)`
		86	`if google_url_regex_result:`
		87	`result = counter`
		88	`break`
		89
		90	`return result`
		91
		92	`def find_google_position(search_string):`
		93	`ENGINE_URL = 'http://www.google' + LOCALE + '/search?q=' + search_string.replace(' ', '+') + '&num=' + str(NUM_PER_PAGE)`
		94	`# print ENGINE_URL`
		95
		96	`# run curl:`
		97	`for i in range(0, MAX_PAGE):`
		98	`result = search_page(i, ENGINE_URL)`
		99	`if result != 0:`
		100	`break`
		101
		102	`# show results`
		103	`if result == 0:`
		104	`print "%s, %s, %d, %d" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, NUM_PER_PAGEMAX_PAGE, MAX_PAGENUM_PER_PAGE/10)`
		105	`else:`
		106	`print "%s, %s, %d, %d" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, result, result/10 + 1)`
		107
		108
		109	`# Run Main`
		110	`main()`

Subversion Repositories SmartDukaan

(root)/trunk/runutils/rankcheck.py – Rev 1746