Subversion Repositories SmartDukaan

Rev

Rev 1787 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1746 vikas 1
#!/usr/bin/python
2
 
3
"""
4
 
1789 vikas 5
 This script prints date of run, search-string, and position and page number where saholic result 
6
 first appears in google search results.
1746 vikas 7
 
8
"""
9
 
10
import sys, pycurl, re
11
import datetime
12
 
13
# Search Strings to be monitored
1782 vikas 14
SEARCH_STRINGS = [
15
'3g mobile phone',
16
'3g mobile phones',
17
'all mobile phones',
18
'android phone',
19
'android phones',
20
'best mobile phone',
21
'best smart phone',
22
'BLACKBERRY 8520 Unlocked Gemini',
23
'BLACKBERRY 9300 Curve 3G Curve 3G',
24
'BLACKBERRY 9780 Bold',
25
'BLACKBERRY 9800 Torch ',
26
'blackberry mobile phones',
27
'blackberry phones',
28
'business mobile phones',
29
'buy mobile phones',
30
'buy mobile phones online',
31
'cheap mobile phone',
32
'cheap mobile phones',
33
'cheapest mobile phone',
34
'cheapest mobile phones',
35
'compare mobile phone',
36
'compare mobile phones',
37
'Dell M01M Streak',
38
'Dell M01M Streak Black',
39
'dual sim mobile phone',
40
'HTC A7272 Desire Z',
41
'HTC A9191 Desire HD',
42
'htc mobile phone',
43
'htc mobile phones',
44
'htc phones',
45
'HTC S710E Incredible S',
46
'htc smart',
47
'latest mobile phone',
48
'latest mobile phones',
49
'LG A165 ',
50
'LG GS108 ',
51
'LG GS155 ',
52
'LG GU220 ',
53
'LG GX200 ',
54
'LG GX300 ',
55
'LG LG P520 ',
56
'lg mobile phone',
57
'lg mobile phones',
58
'LG P350 Optimus ME',
59
'LG P500 Optimus',
60
'lg phones',
61
'LG S310 ',
62
'LG T300 Cookie Joy',
63
'Micromax Q7 ',
64
'Micromax X-226+ ',
65
'Micromax X-265 ',
66
'Micromax X-410 ',
67
'mobile phone',
68
'mobile phone compare',
69
'mobile phone comparison',
70
'mobile phone models',
71
'mobile phone price',
72
'mobile phone prices',
73
'mobile phone reviews',
74
'mobile phones',
75
'mobile phones comparison',
76
'mobile phones prices',
77
'Motorola EX115 Starling',
78
'Motorola MB502 Charm ',
79
'motorola mobile phones',
80
'new mobile phones',
81
'Nokia 1280 ',
82
'Nokia 1616 ',
83
'Nokia 2700c ',
84
'Nokia 5130c ',
85
'Nokia 5233 ',
86
'Nokia C1-01 ',
87
'Nokia C1-02 ',
88
'Nokia C2-01 ',
89
'Nokia C3-00 ',
90
'Nokia C3-01 ',
91
'Nokia C5-00 ',
92
'Nokia C5-03 ',
93
'Nokia C6-00 ',
94
'Nokia C6-01 ',
95
'Nokia C7-00 ',
96
'Nokia E5-00 ',
97
'Nokia E7-00 ',
98
'Nokia N8 ',
99
'Nokia X-2 ',
100
'Nokia X2-01 ',
101
'Nokia X3-02 ',
102
'qwerty mobile phones',
103
'Samsung B7510 Galaxy Pro',
104
'Samsung B7722 Star DUOS ',
105
'Samsung C3010s ',
106
'Samsung C3200 Monte Bar ',
107
'Samsung C3222 Qwerty',
108
'Samsung C3303 Champ ',
109
'Samsung C3303i Champ Mega Cam',
110
'Samsung C3530 Metro',
111
'Samsung E1081',
112
'Samsung E1160',
113
'Samsung E1252',
114
'Samsung E2152-M with 1GB MMC',
115
'Samsung E2652 Champ Duos',
116
'Samsung i9003 Galaxy S (4GB) ',
117
'samsung mobile',
118
'samsung mobile phone',
119
'samsung mobile phones',
120
'Samsung P1000-Basic Galaxy Tablet ',
121
'Samsung P1000-Basic Galaxy Tablet  Chic White',
122
'samsung phones',
123
'Samsung S3310i Metro with HS and 2Gb',
124
'Samsung S3353 Trevi ',
125
'Samsung S3850 Corby-II',
126
'Samsung S5253 wo Card Wave Series (525)',
127
'Samsung S5263 Star-II',
128
'Samsung S5333 wo Card Wave -Side Slider',
129
'Samsung S5570 Galaxy POP ',
130
'Samsung S8530 Wave 2',
131
'samsung smart phone',
132
'smart mobile phone',
133
'smart mobile phones',
134
'smart phone',
135
'smartphones',
136
'Sony Ericsson E15i Xperia X8 ',
137
'Sony Ericsson E15i Xperia X8 Xperia',
138
'Sony Ericsson E15i Xperia X8 Xperia Dark Blue',
139
'Sony Ericsson E15i Xperia X8 Xperia Swing Pink',
140
'Sony Ericsson LT15i ARC',
141
'Sony Ericsson LT15i ARC Midnight Blue',
142
'Sony Ericsson LT15i ARC Misty Silver',
143
'sony ericsson mobile phone',
144
'sony ericsson mobile phones',
145
'sony ericsson phones',
146
'Sony Ericsson R800i Xperia Play',
147
'Sony Ericsson R800i Xperia Play Black',
148
'Sony Ericsson W100i Spiro ',
149
'Sony Ericsson W100i Spiro  Contrast Black',
150
'Sony Ericsson W100i Spiro  Stealth Black',
151
'Sony Ericsson W100i Spiro  Sunset Pink',
152
'Sony Ericsson W150i Yendo ',
153
'Sony Ericsson W150i Yendo  Black & Red',
154
'Sony Ericsson W150i Yendo  White Blue',
155
'Sony Ericsson W20i Zylo',
156
'Sony Ericsson W20i Zylo Chacha Silver',
157
'Sony Ericsson W20i Zylo Meteorite White',
158
'Sony Ericsson W20i Zylo Swing Pink',
159
'Sony Ericsson W20i Zylo ZAZZ BLACK',
160
'Sony Ericsson X10 Mini Pro Mini Pro (U20i)',
161
'Sony Ericsson Xperia Play R800i',
162
'Spice  G6550 ',
163
'Spice  G6550  Black',
164
'Spice  M5100 ',
165
'Spice  M5100  Black',
166
'Spice  M5570 ',
167
'Spice  M5570  Black',
168
'Spice  M5570  Blue',
169
'Spice  M5570  Red',
170
'Spice  M6350 ',
171
'Spice  M6350  Black & Golden',
172
'Spice  M6460 ',
173
'Spice  M6460  Brown',
174
'Spice  M6460  Gray',
175
'Spice  M9000 ',
176
'Spice  M9000  Black',
177
'Spice  MI310 ',
178
'Spice  MI310  Brown',
179
'Spice M4250 ',
180
'Spice M4250  Black + Red',
181
'Spice M4580 DV ',
182
'Spice M4580 DV  Black & Golden',
183
'Spice M5056 ',
184
'Spice M5056  Black',
185
'Spice M5161n ',
186
'Spice M5161n  Black',
187
'Spice M5161n  Black & Golden',
188
'Spice M5170 ',
189
'Spice M5170  Grey Black',
190
'Spice M5262 ',
191
'Spice M5262  Black',
192
'Spice M5262  Black Blue',
193
'Spice M5454 ',
194
'Spice M5454  Silver Blue',
195
'Spice M5454  Silver Red',
196
'Spice M5750 ',
197
'Spice M5750  Black & Red',
198
'Spice M6363 ',
199
'Spice M6363  Black-Orange',
200
'Spice M6464 ',
201
'Spice M6464  Black',
202
'Spice QT58 Mini ',
203
'Spice QT58 Mini  Black',
204
'Spice QT58 Mini  Red',
205
'Spice QT58 Mini  White',
206
'Spice QT61 Transforme Transformer',
207
'Spice QT61 Transforme Transformer Black',
208
'Spice QT68 ',
209
'Spice QT68  Black',
210
'Spice S1200 ',
211
'Spice S1200  Black',
212
'top mobile phones',
213
'touch screen mobile phones',
214
'touch screen phones'
215
]
216
 
1746 vikas 217
# some initial setup:
218
USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 6.0)'
219
# USER_AGENT = 'Mozilla/5.0'
220
FIND_DOMAIN = 'www.saholic.com'
221
LOCALE = '.co.in'
222
MAX_PAGE = 1
223
NUM_PER_PAGE = 100
224
 
225
# define class to store result:
226
class RankCheck:
227
  def __init__(self):
228
    self.contents = ''
229
 
230
  def body_callback(self, buf):
231
    self.contents = self.contents + buf
232
 
233
 
234
def main():
1786 vikas 235
  for search_string in SEARCH_STRINGS:
1746 vikas 236
    find_google_position(search_string)
237
 
238
def init_curl(rankRequest, rankCheck):
239
  # set up curl:
240
  rankRequest.setopt(pycurl.USERAGENT, USER_AGENT)
241
  rankRequest.setopt(pycurl.FOLLOWLOCATION, 1)
242
  rankRequest.setopt(pycurl.AUTOREFERER, 1)
243
  rankRequest.setopt(pycurl.WRITEFUNCTION, rankCheck.body_callback)
244
  rankRequest.setopt(pycurl.COOKIEFILE, '')
245
  rankRequest.setopt(pycurl.HTTPGET, 1)
246
  rankRequest.setopt(pycurl.REFERER, '')
247
 
248
def search_page(page, page_url):
249
  # instantiate curl and result objects:
250
  rankRequest = pycurl.Curl()
251
  rankCheck = RankCheck();
252
  init_curl(rankRequest, rankCheck)
253
  rankRequest.setopt(pycurl.URL, page_url + '&start=' + str(page * NUM_PER_PAGE))
254
  rankRequest.perform()
255
  # close curl:
256
  rankRequest.close()
257
 
258
  # collect the search results
259
  html = rankCheck.contents
260
  counter = page*NUM_PER_PAGE
261
  result = 0
262
 
263
  url=unicode(r'(<h3 class="r"><a href=")((https?):((//))+[\w\d:#@%/;$()~_?\+-=\\\.&]*)')
264
  for google_result in re.finditer(url, html):
265
    # print m.group()
266
    this_url = google_result.group()
267
    this_url = this_url[23:]
268
    counter += 1
269
 
270
    google_url_regex = re.compile("((https?):((//))+([\w\d:#@%/;$()~_?\+-=\\\.&])*" + FIND_DOMAIN + "+([\w\d:#@%/;$()~_?\+-=\\\.&])*)")
271
    google_url_regex_result = google_url_regex.match(this_url)
272
    if google_url_regex_result:
273
      result = counter
274
      break
275
 
276
  return result
277
 
278
def find_google_position(search_string):
279
  ENGINE_URL = 'http://www.google' + LOCALE + '/search?q=' + search_string.replace(' ', '+') + '&num=' + str(NUM_PER_PAGE)
280
  # print ENGINE_URL
281
 
282
  # run curl:
283
  for i in range(0, MAX_PAGE):
284
    result = search_page(i, ENGINE_URL)
285
    if result != 0:
286
      break
287
 
1787 vikas 288
  f = open('/var/lib/tomcat6/webapps/db/googleranks/rank-' + datetime.datetime.now().strftime("%Y-%m-%d") + '.txt', 'a')
1785 vikas 289
 
1746 vikas 290
  # show results
291
  if result == 0:
1787 vikas 292
    f.write("{0:s}, {1:s}, {2:d}, {3:d}\n".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, NUM_PER_PAGE*MAX_PAGE, MAX_PAGE*NUM_PER_PAGE/10))
1746 vikas 293
  else:
1787 vikas 294
    f.write("{0:s}, {1:s}, {2:d}, {3:d}\n".format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"), search_string, result, result/10 + 1))
1746 vikas 295
 
296
 
297
# Run Main
298
main()