Rev 1786 | Rev 1789 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
#!/usr/bin/python"""This script accepts Domain, Search String and Google Locale arguments, then returnswhich Search String results page for the Google Locale the Domain appears on.Usage example:rankcheck {domain} {searchstring} {locale}Output example:rankcheck geekology.co.za 'bash scripting' .co.za- The domain 'geekology.co.za' is listed in position 2 (page 1) for the search 'bash+scripting' on google.co.za"""__author__ = "Willem van Zyl (willem@geekology.co.za)"__version__ = "$Revision: 1.5 $"__date__ = "$Date: 2009/02/10 12:10:24 $"__license__ = "GPLv3"import sys, pycurl, reimport datetime# Search Strings to be monitoredSEARCH_STRINGS = ['3g mobile phone','3g mobile phones','all mobile phones','android phone','android phones','best mobile phone','best smart phone','BLACKBERRY 8520 Unlocked Gemini','BLACKBERRY 9300 Curve 3G Curve 3G','BLACKBERRY 9780 Bold','BLACKBERRY 9800 Torch ','blackberry mobile phones','blackberry phones','business mobile phones','buy mobile phones','buy mobile phones online','cheap mobile phone','cheap mobile phones','cheapest mobile phone','cheapest mobile phones','compare mobile phone','compare mobile phones','Dell M01M Streak','Dell M01M Streak Black','dual sim mobile phone','HTC A7272 Desire Z','HTC A9191 Desire HD','htc mobile phone','htc mobile phones','htc phones','HTC S710E Incredible S','htc smart','latest mobile phone','latest mobile phones','LG A165 ','LG GS108 ','LG GS155 ','LG GU220 ','LG GX200 ','LG GX300 ','LG LG P520 ','lg mobile phone','lg mobile phones','LG P350 Optimus ME','LG P500 Optimus','lg phones','LG S310 ','LG T300 Cookie Joy','Micromax Q7 ','Micromax X-226+ ','Micromax X-265 ','Micromax X-410 ','mobile phone','mobile phone compare','mobile phone comparison','mobile phone models','mobile phone price','mobile phone prices','mobile phone reviews','mobile phones','mobile phones comparison','mobile phones prices','Motorola EX115 Starling','Motorola MB502 Charm ','motorola mobile phones','new mobile phones','Nokia 1280 ','Nokia 1616 ','Nokia 2700c ','Nokia 5130c ','Nokia 5233 ','Nokia C1-01 ','Nokia C1-02 ','Nokia C2-01 ','Nokia C3-00 ','Nokia C3-01 ','Nokia C5-00 ','Nokia C5-03 ','Nokia C6-00 ','Nokia C6-01 ','Nokia C7-00 ','Nokia E5-00 ','Nokia E7-00 ','Nokia N8 ','Nokia X-2 ','Nokia X2-01 ','Nokia X3-02 ','qwerty mobile phones','Samsung B7510 Galaxy Pro','Samsung B7722 Star DUOS ','Samsung C3010s ','Samsung C3200 Monte Bar ','Samsung C3222 Qwerty','Samsung C3303 Champ ','Samsung C3303i Champ Mega Cam','Samsung C3530 Metro','Samsung E1081','Samsung E1160','Samsung E1252','Samsung E2152-M with 1GB MMC','Samsung E2652 Champ Duos','Samsung i9003 Galaxy S (4GB) ','samsung mobile','samsung mobile phone','samsung mobile phones','Samsung P1000-Basic Galaxy Tablet ','Samsung P1000-Basic Galaxy Tablet Chic White','samsung phones','Samsung S3310i Metro with HS and 2Gb','Samsung S3353 Trevi ','Samsung S3850 Corby-II','Samsung S5253 wo Card Wave Series (525)','Samsung S5263 Star-II','Samsung S5333 wo Card Wave -Side Slider','Samsung S5570 Galaxy POP ','Samsung S8530 Wave 2','samsung smart phone','smart mobile phone','smart mobile phones','smart phone','smartphones','Sony Ericsson E15i Xperia X8 ','Sony Ericsson E15i Xperia X8 Xperia','Sony Ericsson E15i Xperia X8 Xperia Dark Blue','Sony Ericsson E15i Xperia X8 Xperia Swing Pink','Sony Ericsson LT15i ARC','Sony Ericsson LT15i ARC Midnight Blue','Sony Ericsson LT15i ARC Misty Silver','sony ericsson mobile phone','sony ericsson mobile phones','sony ericsson phones','Sony Ericsson R800i Xperia Play','Sony Ericsson R800i Xperia Play Black','Sony Ericsson W100i Spiro ','Sony Ericsson W100i Spiro Contrast Black','Sony Ericsson W100i Spiro Stealth Black','Sony Ericsson W100i Spiro Sunset Pink','Sony Ericsson W150i Yendo ','Sony Ericsson W150i Yendo Black & Red','Sony Ericsson W150i Yendo White Blue','Sony Ericsson W20i Zylo','Sony Ericsson W20i Zylo Chacha Silver','Sony Ericsson W20i Zylo Meteorite White','Sony Ericsson W20i Zylo Swing Pink','Sony Ericsson W20i Zylo ZAZZ BLACK','Sony Ericsson X10 Mini Pro Mini Pro (U20i)','Sony Ericsson Xperia Play R800i','Spice G6550 ','Spice G6550 Black','Spice M5100 ','Spice M5100 Black','Spice M5570 ','Spice M5570 Black','Spice M5570 Blue','Spice M5570 Red','Spice M6350 ','Spice M6350 Black & Golden','Spice M6460 ','Spice M6460 Brown','Spice M6460 Gray','Spice M9000 ','Spice M9000 Black','Spice MI310 ','Spice MI310 Brown','Spice M4250 ','Spice M4250 Black + Red','Spice M4580 DV ','Spice M4580 DV Black & Golden','Spice M5056 ','Spice M5056 Black','Spice M5161n ','Spice M5161n Black','Spice M5161n Black & Golden','Spice M5170 ','Spice M5170 Grey Black','Spice M5262 ','Spice M5262 Black','Spice M5262 Black Blue','Spice M5454 ','Spice M5454 Silver Blue','Spice M5454 Silver Red','Spice M5750 ','Spice M5750 Black & Red','Spice M6363 ','Spice M6363 Black-Orange','Spice M6464 ','Spice M6464 Black','Spice QT58 Mini ','Spice QT58 Mini Black','Spice QT58 Mini Red','Spice QT58 Mini White','Spice QT61 Transforme Transformer','Spice QT61 Transforme Transformer Black','Spice QT68 ','Spice QT68 Black','Spice S1200 ','Spice S1200 Black','top mobile phones','touch screen mobile phones','touch screen phones']# some initial setup:USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0)'# USER_AGENT = 'Mozilla/5.0'FIND_DOMAIN = 'www.saholic.com'LOCALE = '.co.in'MAX_PAGE = 1NUM_PER_PAGE = 100# define class to store result:class RankCheck:def __init__(self):self.contents = ''def body_callback(self, buf):self.contents = self.contents + bufdef main():for search_string in SEARCH_STRINGS:find_google_position(search_string)def init_curl(rankRequest, rankCheck):# set up curl:rankRequest.setopt(pycurl.USERAGENT, USER_AGENT)rankRequest.setopt(pycurl.FOLLOWLOCATION, 1)rankRequest.setopt(pycurl.AUTOREFERER, 1)rankRequest.setopt(pycurl.WRITEFUNCTION, rankCheck.body_callback)rankRequest.setopt(pycurl.COOKIEFILE, '')rankRequest.setopt(pycurl.HTTPGET, 1)rankRequest.setopt(pycurl.REFERER, '')def search_page(page, page_url):# instantiate curl and result objects:rankRequest = pycurl.Curl()rankCheck = RankCheck();init_curl(rankRequest, rankCheck)rankRequest.setopt(pycurl.URL, page_url + '&start=' + str(page * NUM_PER_PAGE))rankRequest.perform()# close curl:rankRequest.close()# collect the search resultshtml = rankCheck.contentscounter = page*NUM_PER_PAGEresult = 0url=unicode(r'(<h3 class="r"><a href=")((https?):((//))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)')for google_result in re.finditer(url, html):# print m.group()this_url = google_result.group()this_url = this_url[23:]counter += 1google_url_regex = re.compile("((https?):((//))+([\w\d:#@%/;$()~_?\+-=\\\.&])*" + FIND_DOMAIN + "+([\w\d:#@%/;$()~_?\+-=\\\.&])*)")google_url_regex_result = google_url_regex.match(this_url)if google_url_regex_result:result = counterbreakreturn resultdef find_google_position(search_string):ENGINE_URL = 'http://www.google' + LOCALE + '/search?q=' + search_string.replace(' ', '+') + '&num=' + str(NUM_PER_PAGE)# print ENGINE_URL# run curl:for i in range(0, MAX_PAGE):result = search_page(i, ENGINE_URL)if result != 0:breakf = open('/var/lib/tomcat6/webapps/db/googleranks/rank-' + datetime.datetime.now().strftime("%Y-%m-%d") + '.txt', 'a')# show resultsif result == 0:f.write("{0:s}, {1:s}, {2:d}, {3:d}\n".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, NUM_PER_PAGE*MAX_PAGE, MAX_PAGE*NUM_PER_PAGE/10))else:f.write("{0:s}, {1:s}, {2:d}, {3:d}\n".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, result, result/10 + 1))# Run Mainmain()