Subversion Repositories SmartDukaan

Rev

Rev 1789 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1746 vikas 1
#!/usr/bin/python
2
 
3
"""
4
 
1789 vikas 5
 This script prints date of run, search-string, and position and page number where saholic result 
6
 first appears in google search results.
1746 vikas 7
 
8
"""
9
 
10
import sys, pycurl, re
11
import datetime
12
 
13
# Search Strings to be monitored
1782 vikas 14
SEARCH_STRINGS = [
15
'3g mobile phone',
16
'3g mobile phones',
17
'all mobile phones',
18
'android phone',
19
'android phones',
20
'best mobile phone',
21
'best smart phone',
22
'BLACKBERRY 8520 Unlocked Gemini',
23
'BLACKBERRY 9300 Curve 3G Curve 3G',
24
'BLACKBERRY 9780 Bold',
25
'BLACKBERRY 9800 Torch ',
26
'blackberry mobile phones',
27
'blackberry phones',
28
'business mobile phones',
29
'buy mobile phones',
30
'buy mobile phones online',
31
'cheap mobile phone',
32
'cheap mobile phones',
33
'cheapest mobile phone',
34
'cheapest mobile phones',
35
'compare mobile phone',
36
'compare mobile phones',
37
'Dell M01M Streak',
38
'Dell M01M Streak Black',
39
'dual sim mobile phone',
40
'HTC A7272 Desire Z',
41
'HTC A9191 Desire HD',
42
'htc mobile phone',
43
'htc mobile phones',
44
'htc phones',
45
'HTC S710E Incredible S',
46
'htc smart',
47
'latest mobile phone',
48
'latest mobile phones',
49
'LG A165 ',
50
'LG GS108 ',
51
'LG GS155 ',
52
'LG GU220 ',
53
'LG GX200 ',
54
'LG GX300 ',
55
'LG LG P520 ',
56
'lg mobile phone',
57
'lg mobile phones',
58
'LG P350 Optimus ME',
59
'LG P500 Optimus',
60
'lg phones',
61
'LG S310 ',
62
'LG T300 Cookie Joy',
63
'Micromax Q7 ',
64
'Micromax X-226+ ',
65
'Micromax X-265 ',
66
'Micromax X-410 ',
67
'mobile phone',
68
'mobile phone compare',
69
'mobile phone comparison',
70
'mobile phone models',
71
'mobile phone price',
72
'mobile phone prices',
73
'mobile phone reviews',
74
'mobile phones',
75
'mobile phones comparison',
76
'mobile phones prices',
77
'Motorola EX115 Starling',
78
'Motorola MB502 Charm ',
79
'motorola mobile phones',
80
'new mobile phones',
81
'Nokia 1280 ',
82
'Nokia 1616 ',
83
'Nokia 2700c ',
84
'Nokia 5130c ',
85
'Nokia 5233 ',
86
'Nokia C1-01 ',
87
'Nokia C1-02 ',
88
'Nokia C2-01 ',
89
'Nokia C3-00 ',
90
'Nokia C3-01 ',
91
'Nokia C5-00 ',
92
'Nokia C5-03 ',
93
'Nokia C6-00 ',
94
'Nokia C6-01 ',
95
'Nokia C7-00 ',
96
'Nokia E5-00 ',
97
'Nokia E7-00 ',
98
'Nokia N8 ',
99
'Nokia X-2 ',
100
'Nokia X2-01 ',
101
'Nokia X3-02 ',
102
'qwerty mobile phones',
103
'Samsung B7510 Galaxy Pro',
104
'Samsung B7722 Star DUOS ',
105
'Samsung C3010s ',
106
'Samsung C3200 Monte Bar ',
107
'Samsung C3222 Qwerty',
108
'Samsung C3303 Champ ',
109
'Samsung C3303i Champ Mega Cam',
110
'Samsung C3530 Metro',
111
'Samsung E1081',
112
'Samsung E1160',
113
'Samsung E1252',
114
'Samsung E2152-M with 1GB MMC',
115
'Samsung E2652 Champ Duos',
116
'Samsung i9003 Galaxy S (4GB) ',
117
'samsung mobile',
118
'samsung mobile phone',
119
'samsung mobile phones',
120
'Samsung P1000-Basic Galaxy Tablet ',
121
'Samsung P1000-Basic Galaxy Tablet  Chic White',
122
'samsung phones',
123
'Samsung S3310i Metro with HS and 2Gb',
124
'Samsung S3353 Trevi ',
125
'Samsung S3850 Corby-II',
126
'Samsung S5253 wo Card Wave Series (525)',
127
'Samsung S5263 Star-II',
128
'Samsung S5333 wo Card Wave -Side Slider',
129
'Samsung S5570 Galaxy POP ',
130
'Samsung S8530 Wave 2',
131
'samsung smart phone',
132
'smart mobile phone',
133
'smart mobile phones',
134
'smart phone',
135
'smartphones',
136
'Sony Ericsson E15i Xperia X8 ',
137
'Sony Ericsson E15i Xperia X8 Xperia',
138
'Sony Ericsson E15i Xperia X8 Xperia Dark Blue',
139
'Sony Ericsson E15i Xperia X8 Xperia Swing Pink',
140
'Sony Ericsson LT15i ARC',
141
'Sony Ericsson LT15i ARC Midnight Blue',
142
'Sony Ericsson LT15i ARC Misty Silver',
143
'sony ericsson mobile phone',
144
'sony ericsson mobile phones',
145
'sony ericsson phones',
146
'Sony Ericsson R800i Xperia Play',
147
'Sony Ericsson R800i Xperia Play Black',
148
'Sony Ericsson W100i Spiro ',
149
'Sony Ericsson W100i Spiro  Contrast Black',
150
'Sony Ericsson W100i Spiro  Stealth Black',
151
'Sony Ericsson W100i Spiro  Sunset Pink',
152
'Sony Ericsson W150i Yendo ',
153
'Sony Ericsson W150i Yendo  Black & Red',
154
'Sony Ericsson W150i Yendo  White Blue',
155
'Sony Ericsson W20i Zylo',
156
'Sony Ericsson W20i Zylo Chacha Silver',
157
'Sony Ericsson W20i Zylo Meteorite White',
158
'Sony Ericsson W20i Zylo Swing Pink',
159
'Sony Ericsson W20i Zylo ZAZZ BLACK',
160
'Sony Ericsson X10 Mini Pro Mini Pro (U20i)',
161
'Sony Ericsson Xperia Play R800i',
162
'Spice  G6550 ',
163
'Spice  G6550  Black',
164
'Spice  M5100 ',
165
'Spice  M5100  Black',
166
'Spice  M5570 ',
167
'Spice  M5570  Black',
168
'Spice  M5570  Blue',
169
'Spice  M5570  Red',
170
'Spice  M6350 ',
171
'Spice  M6350  Black & Golden',
172
'Spice  M6460 ',
173
'Spice  M6460  Brown',
174
'Spice  M6460  Gray',
175
'Spice  M9000 ',
176
'Spice  M9000  Black',
177
'Spice  MI310 ',
178
'Spice  MI310  Brown',
179
'Spice M4250 ',
180
'Spice M4250  Black + Red',
181
'Spice M4580 DV ',
182
'Spice M4580 DV  Black & Golden',
183
'Spice M5056 ',
184
'Spice M5056  Black',
185
'Spice M5161n ',
186
'Spice M5161n  Black',
187
'Spice M5161n  Black & Golden',
188
'Spice M5170 ',
189
'Spice M5170  Grey Black',
190
'Spice M5262 ',
191
'Spice M5262  Black',
192
'Spice M5262  Black Blue',
193
'Spice M5454 ',
194
'Spice M5454  Silver Blue',
195
'Spice M5454  Silver Red',
196
'Spice M5750 ',
197
'Spice M5750  Black & Red',
198
'Spice M6363 ',
199
'Spice M6363  Black-Orange',
200
'Spice M6464 ',
201
'Spice M6464  Black',
202
'Spice QT58 Mini ',
203
'Spice QT58 Mini  Black',
204
'Spice QT58 Mini  Red',
205
'Spice QT58 Mini  White',
206
'Spice QT61 Transforme Transformer',
207
'Spice QT61 Transforme Transformer Black',
208
'Spice QT68 ',
209
'Spice QT68  Black',
210
'Spice S1200 ',
211
'Spice S1200  Black',
212
'top mobile phones',
213
'touch screen mobile phones',
1887 vikas 214
'touch screen phones',
215
'nokia mobile battery',
216
'Nokia Battery BL-4C'
1782 vikas 217
]
218
 
1746 vikas 219
# some initial setup:
220
USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0)'
221
# USER_AGENT = 'Mozilla/5.0'
222
FIND_DOMAIN = 'www.saholic.com'
223
LOCALE = '.co.in'
224
MAX_PAGE = 1
225
NUM_PER_PAGE = 100
226
 
227
# define class to store result:
228
class RankCheck:
229
  def __init__(self):
230
    self.contents = ''
231
 
232
  def body_callback(self, buf):
233
    self.contents = self.contents + buf
234
 
235
 
236
def main():
1786 vikas 237
  for search_string in SEARCH_STRINGS:
1746 vikas 238
    find_google_position(search_string)
239
 
240
def init_curl(rankRequest, rankCheck):
241
  # set up curl:
242
  rankRequest.setopt(pycurl.USERAGENT, USER_AGENT)
243
  rankRequest.setopt(pycurl.FOLLOWLOCATION, 1)
244
  rankRequest.setopt(pycurl.AUTOREFERER, 1)
245
  rankRequest.setopt(pycurl.WRITEFUNCTION, rankCheck.body_callback)
246
  rankRequest.setopt(pycurl.COOKIEFILE, '')
247
  rankRequest.setopt(pycurl.HTTPGET, 1)
248
  rankRequest.setopt(pycurl.REFERER, '')
249
 
250
def search_page(page, page_url):
251
  # instantiate curl and result objects:
252
  rankRequest = pycurl.Curl()
253
  rankCheck = RankCheck();
254
  init_curl(rankRequest, rankCheck)
255
  rankRequest.setopt(pycurl.URL, page_url + '&start=' + str(page * NUM_PER_PAGE))
256
  rankRequest.perform()
257
  # close curl:
258
  rankRequest.close()
259
 
260
  # collect the search results
261
  html = rankCheck.contents
262
  counter = page*NUM_PER_PAGE
263
  result = 0
264
 
265
  url=unicode(r'(<h3 class="r"><a href=")((https?):((//))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)')
266
  for google_result in re.finditer(url, html):
267
    # print m.group()
268
    this_url = google_result.group()
269
    this_url = this_url[23:]
270
    counter += 1
271
 
272
    google_url_regex = re.compile("((https?):((//))+([\w\d:#@%/;$()~_?\+-=\\\.&])*" + FIND_DOMAIN + "+([\w\d:#@%/;$()~_?\+-=\\\.&])*)")
273
    google_url_regex_result = google_url_regex.match(this_url)
274
    if google_url_regex_result:
275
      result = counter
276
      break
277
 
278
  return result
279
 
280
def find_google_position(search_string):
281
  ENGINE_URL = 'http://www.google' + LOCALE + '/search?q=' + search_string.replace(' ', '+') + '&num=' + str(NUM_PER_PAGE)
282
  # print ENGINE_URL
283
 
284
  # run curl:
285
  for i in range(0, MAX_PAGE):
286
    result = search_page(i, ENGINE_URL)
287
    if result != 0:
288
      break
289
 
1787 vikas 290
  f = open('/var/lib/tomcat6/webapps/db/googleranks/rank-' + datetime.datetime.now().strftime("%Y-%m-%d") + '.txt', 'a')
1785 vikas 291
 
1746 vikas 292
  # show results
293
  if result == 0:
1787 vikas 294
    f.write("{0:s}, {1:s}, {2:d}, {3:d}\n".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, NUM_PER_PAGE*MAX_PAGE, MAX_PAGE*NUM_PER_PAGE/10))
1746 vikas 295
  else:
1787 vikas 296
    f.write("{0:s}, {1:s}, {2:d}, {3:d}\n".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, result, result/10 + 1))
1746 vikas 297
 
298
 
299
# Run Main
300
main()