Subversion Repositories SmartDukaan

Rev

Rev 30 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
30 ashish 1
/*
2
 * Licensed to the Apache Software Foundation (ASF) under one
3
 * or more contributor license agreements. See the NOTICE file
4
 * distributed with this work for additional information
5
 * regarding copyright ownership. The ASF licenses this file
6
 * to you under the Apache License, Version 2.0 (the
7
 * "License"); you may not use this file except in compliance
8
 * with the License. You may obtain a copy of the License at
9
 *
10
 *   http://www.apache.org/licenses/LICENSE-2.0
11
 *
12
 * Unless required by applicable law or agreed to in writing,
13
 * software distributed under the License is distributed on an
14
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
 * KIND, either express or implied. See the License for the
16
 * specific language governing permissions and limitations
17
 * under the License.
18
 */
19
 
20
#include <sys/time.h>
21
 
22
#include "FacebookBase.h"
23
#include "ServiceTracker.h"
24
#include "concurrency/ThreadManager.h"
25
 
26
using namespace std;
27
using namespace facebook::fb303;
28
using namespace apache::thrift::concurrency;
29
 
30
 
31
uint64_t ServiceTracker::CHECKPOINT_MINIMUM_INTERVAL_SECONDS = 60;
32
int ServiceTracker::LOG_LEVEL = 5;
33
 
34
 
35
ServiceTracker::ServiceTracker(facebook::fb303::FacebookBase *handler,
36
                               void (*logMethod)(int, const string &),
37
                               bool featureCheckpoint,
38
                               bool featureStatusCheck,
39
                               bool featureThreadCheck,
40
                               Stopwatch::Unit stopwatchUnit)
41
  : handler_(handler), logMethod_(logMethod),
42
    featureCheckpoint_(featureCheckpoint),
43
    featureStatusCheck_(featureStatusCheck),
44
    featureThreadCheck_(featureThreadCheck),
45
    stopwatchUnit_(stopwatchUnit),
46
    checkpointServices_(0)
47
{
48
  if (featureCheckpoint_) {
49
    time_t now = time(NULL);
50
    checkpointTime_ = now;
51
  } else {
52
    checkpointTime_ = 0;
53
  }
54
}
55
 
56
/**
57
 * Registers the beginning of a "service method": basically, any of
58
 * the implementations of Thrift remote procedure calls that a
59
 * FacebookBase handler is handling.  Controls concurrent
60
 * services and reports statistics (via log and via fb303 counters).
61
 * Throws an exception if the server is not ready to handle service
62
 * methods yet.
63
 *
64
 * note: The relationship between startService() and finishService()
65
 * is currently defined so that a call to finishService() should only
66
 * be matched to this call to startService() if this method returns
67
 * without exception.  It wouldn't be a problem to implement things
68
 * the other way, so that *every* start needed a finish, but this
69
 * convention was chosen to match the way an object's constructor and
70
 * destructor work together, i.e. to work well with ServiceMethod
71
 * objects.
72
 *
73
 * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod
74
 *                                           object instantiated at the start
75
 *                                           of the service method.
76
 */
77
void
78
ServiceTracker::startService(const ServiceMethod &serviceMethod)
79
{
80
  // note: serviceMethod.timer_ automatically starts at construction.
81
 
82
  // log service start
83
  logMethod_(5, serviceMethod.signature_);
84
 
85
  // check handler ready
86
  if (featureStatusCheck_ && !serviceMethod.featureLogOnly_) {
87
    // note: Throwing exceptions before counting statistics.  See note
88
    // in method header.
89
    // note: A STOPPING server is not accepting new connections, but it
90
    // is still handling any already-connected threads -- so from the
91
    // service method's point of view, a status of STOPPING is a green
92
    // light.
93
    facebook::fb303::fb_status status = handler_->getStatus();
94
    if (status != facebook::fb303::ALIVE
95
        && status != facebook::fb303::STOPPING) {
96
      if (status == facebook::fb303::STARTING) {
97
        throw ServiceException("Server starting up; please try again later");
98
      } else {
99
        throw ServiceException("Server not alive; please try again later");
100
      }
101
    }
102
  }
103
 
104
  // check server threads
105
  if (featureThreadCheck_ && !serviceMethod.featureLogOnly_) {
106
    // note: Might want to put these messages in reportCheckpoint() if
107
    // log is getting spammed.
108
    if (threadManager_ != NULL) {
109
      size_t idle_count = threadManager_->idleWorkerCount();
110
      if (idle_count == 0) {
111
        stringstream message;
112
        message << "service " << serviceMethod.signature_
113
                << ": all threads (" << threadManager_->workerCount()
114
                << ") in use";
115
        logMethod_(3, message.str());
116
      }
117
    }
118
  }
119
}
120
 
121
/**
122
 * Logs a significant step in the middle of a "service method"; see
123
 * startService.
124
 *
125
 * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod
126
 *                                           object instantiated at the start
127
 *                                           of the service method.
128
 * @return int64_t Elapsed units (see stopwatchUnit_) since ServiceMethod
129
 *                 instantiation.
130
 */
131
int64_t
132
ServiceTracker::stepService(const ServiceMethod &serviceMethod,
133
                            const string &stepName)
134
{
135
  stringstream message;
136
  string elapsed_label;
137
  int64_t elapsed = serviceMethod.timer_.elapsedUnits(stopwatchUnit_,
138
                                                      &elapsed_label);
139
  message << serviceMethod.signature_
140
          << ' ' << stepName
141
          << " [" << elapsed_label << ']';
142
  logMethod_(5, message.str());
143
  return elapsed;
144
}
145
 
146
/**
147
 * Registers the end of a "service method"; see startService().
148
 *
149
 * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod
150
 *                                           object instantiated at the start
151
 *                                           of the service method.
152
 */
153
void
154
ServiceTracker::finishService(const ServiceMethod &serviceMethod)
155
{
156
  // log end of service
157
  stringstream message;
158
  string duration_label;
159
  int64_t duration = serviceMethod.timer_.elapsedUnits(stopwatchUnit_,
160
                                                       &duration_label);
161
  message << serviceMethod.signature_
162
          << " finish [" << duration_label << ']';
163
  logMethod_(5, message.str());
164
 
165
  // count, record, and maybe report service statistics
166
  if (!serviceMethod.featureLogOnly_) {
167
 
168
    if (!featureCheckpoint_) {
169
 
170
      // lifetime counters
171
      // (note: No need to lock statisticsMutex_ if not doing checkpoint;
172
      // FacebookService::incrementCounter() is already thread-safe.)
173
      handler_->incrementCounter("lifetime_services");
174
 
175
    } else {
176
 
177
      statisticsMutex_.lock();
178
      // note: No exceptions expected from this code block.  Wrap in a try
179
      // just to be safe.
180
      try {
181
 
182
        // lifetime counters
183
        // note: Good to synchronize this with the increment of
184
        // checkpoint services, even though incrementCounter() is
185
        // already thread-safe, for the sake of checkpoint reporting
186
        // consistency (i.e.  since the last checkpoint,
187
        // lifetime_services has incremented by checkpointServices_).
188
        handler_->incrementCounter("lifetime_services");
189
 
190
        // checkpoint counters
191
        checkpointServices_++;
192
        checkpointDuration_ += duration;
193
 
194
        // per-service timing
195
        // note kjv: According to my tests it is very slightly faster to
196
        // call insert() once (and detect not-found) than calling find()
197
        // and then maybe insert (if not-found).  However, the difference
198
        // is tiny for small maps like this one, and the code for the
199
        // faster solution is slightly less readable.  Also, I wonder if
200
        // the instantiation of the (often unused) pair to insert makes
201
        // the first algorithm slower after all.
202
        map<string, pair<uint64_t, uint64_t> >::iterator iter;
203
        iter = checkpointServiceDuration_.find(serviceMethod.name_);
204
        if (iter != checkpointServiceDuration_.end()) {
205
          iter->second.first++;
206
          iter->second.second += duration;
207
        } else {
208
          checkpointServiceDuration_.insert(make_pair(serviceMethod.name_,
209
                                                      make_pair(1, duration)));
210
        }
211
 
212
        // maybe report checkpoint
213
        // note: ...if it's been long enough since the last report.
214
        time_t now = time(NULL);
215
        uint64_t check_interval = now - checkpointTime_;
216
        if (check_interval >= CHECKPOINT_MINIMUM_INTERVAL_SECONDS) {
217
          reportCheckpoint();
218
        }
219
 
220
      } catch (...) {
221
        statisticsMutex_.unlock();
222
        throw;
223
      }
224
      statisticsMutex_.unlock();
225
 
226
    }
227
  }
228
}
229
 
230
/**
231
 * Logs some statistics gathered since the last call to this method.
232
 *
233
 * note: Thread race conditions on this method could cause
234
 * misreporting and/or undefined behavior; the caller must protect
235
 * uses of the object variables (and calls to this method) with a
236
 * mutex.
237
 *
238
 */
239
void
240
ServiceTracker::reportCheckpoint()
241
{
242
  time_t now = time(NULL);
243
 
244
  uint64_t check_count = checkpointServices_;
245
  uint64_t check_interval = now - checkpointTime_;
246
  uint64_t check_duration = checkpointDuration_;
247
 
248
  // export counters for timing of service methods (by service name)
249
  handler_->setCounter("checkpoint_time", check_interval);
250
  map<string, pair<uint64_t, uint64_t> >::iterator iter;
251
  uint64_t count;
252
  for (iter = checkpointServiceDuration_.begin();
253
       iter != checkpointServiceDuration_.end();
254
       iter++) {
255
    count = iter->second.first;
256
    handler_->setCounter(string("checkpoint_count_") + iter->first, count);
257
    if (count == 0) {
258
      handler_->setCounter(string("checkpoint_speed_") + iter->first,
259
                           0);
260
    } else {
261
      handler_->setCounter(string("checkpoint_speed_") + iter->first,
262
                           iter->second.second / count);
263
    }
264
  }
265
 
266
  // reset checkpoint variables
267
  // note: Clearing the map while other threads are using it might
268
  // cause undefined behavior.
269
  checkpointServiceDuration_.clear();
270
  checkpointTime_ = now;
271
  checkpointServices_ = 0;
272
  checkpointDuration_ = 0;
273
 
274
  // get lifetime variables
275
  uint64_t life_count = handler_->getCounter("lifetime_services");
276
  uint64_t life_interval = now - handler_->aliveSince();
277
 
278
  // log checkpoint
279
  stringstream message;
280
  message << "checkpoint_time:" << check_interval
281
          << " checkpoint_services:" << check_count
282
          << " checkpoint_speed_sum:" << check_duration
283
          << " lifetime_time:" << life_interval
284
          << " lifetime_services:" << life_count;
285
  if (featureThreadCheck_ && threadManager_ != NULL) {
286
    size_t worker_count = threadManager_->workerCount();
287
    size_t idle_count = threadManager_->idleWorkerCount();
288
    message << " total_workers:" << worker_count
289
            << " active_workers:" << (worker_count - idle_count);
290
  }
291
  logMethod_(4, message.str());
292
}
293
 
294
/**
295
 * Remembers the thread manager used in the server, for monitoring thread
296
 * activity.
297
 *
298
 * @param shared_ptr<ThreadManager> threadManager The server's thread manager.
299
 */
300
void
301
ServiceTracker::setThreadManager(boost::shared_ptr<ThreadManager>
302
                                 threadManager)
303
{
304
  threadManager_ = threadManager;
305
}
306
 
307
/**
308
 * Logs messages to stdout; the passed message will be logged if the
309
 * passed level is less than or equal to LOG_LEVEL.
310
 *
311
 * This is the default logging method used by the ServiceTracker.  An
312
 * alternate logging method (that accepts the same parameters) may be
313
 * specified to the constructor.
314
 *
315
 * @param int level A level associated with the message: higher levels
316
 *                  are used to indicate higher levels of detail.
317
 * @param string message The message to log.
318
 */
319
void
320
ServiceTracker::defaultLogMethod(int level, const string &message)
321
{
322
  if (level <= LOG_LEVEL) {
323
    string level_string;
324
    time_t now = time(NULL);
325
    char now_pretty[26];
326
    ctime_r(&now, now_pretty);
327
    now_pretty[24] = '\0';
328
    switch (level) {
329
    case 1:
330
      level_string = "CRITICAL";
331
      break;
332
    case 2:
333
      level_string = "ERROR";
334
      break;
335
    case 3:
336
      level_string = "WARNING";
337
      break;
338
    case 5:
339
      level_string = "DEBUG";
340
      break;
341
    case 4:
342
    default:
343
      level_string = "INFO";
344
      break;
345
    }
346
    cout << '[' << level_string << "] [" << now_pretty << "] "
347
         << message << endl;
348
  }
349
}
350
 
351
 
352
/**
353
 * Creates a Stopwatch, which can report the time elapsed since its
354
 * creation.
355
 *
356
 */
357
Stopwatch::Stopwatch()
358
{
359
  gettimeofday(&startTime_, NULL);
360
}
361
 
362
void
363
Stopwatch::reset()
364
{
365
  gettimeofday(&startTime_, NULL);
366
}
367
 
368
uint64_t
369
Stopwatch::elapsedUnits(Stopwatch::Unit unit, string *label) const
370
{
371
  timeval now_time;
372
  gettimeofday(&now_time, NULL);
373
  time_t duration_secs = now_time.tv_sec - startTime_.tv_sec;
374
 
375
  uint64_t duration_units;
376
  switch (unit) {
377
  case UNIT_SECONDS:
378
    duration_units = duration_secs
379
      + (now_time.tv_usec - startTime_.tv_usec + 500000) / 1000000;
380
    if (NULL != label) {
381
      stringstream ss_label;
382
      ss_label << duration_units << " secs";
383
      label->assign(ss_label.str());
384
    }
385
    break;
386
  case UNIT_MICROSECONDS:
387
    duration_units = duration_secs * 1000000
388
      + now_time.tv_usec - startTime_.tv_usec;
389
    if (NULL != label) {
390
      stringstream ss_label;
391
      ss_label << duration_units << " us";
392
      label->assign(ss_label.str());
393
    }
394
    break;
395
  case UNIT_MILLISECONDS:
396
  default:
397
    duration_units = duration_secs * 1000
398
      + (now_time.tv_usec - startTime_.tv_usec + 500) / 1000;
399
    if (NULL != label) {
400
      stringstream ss_label;
401
      ss_label << duration_units << " ms";
402
      label->assign(ss_label.str());
403
    }
404
    break;
405
  }
406
  return duration_units;
407
}
408
 
409
/**
410
 * Creates a ServiceMethod, used for tracking a single service method
411
 * invocation (via the ServiceTracker).  The passed name of the
412
 * ServiceMethod is used to group statistics (e.g. counts and durations)
413
 * for similar invocations; the passed signature is used to uniquely
414
 * identify the particular invocation in the log.
415
 *
416
 * note: A version of this constructor is provided that automatically
417
 * forms a signature the name and a passed numeric id.  Silly, sure,
418
 * but commonly used, since it often saves the caller a line or two of
419
 * code.
420
 *
421
 * @param ServiceTracker *tracker The service tracker that will track this
422
 *                                ServiceMethod.
423
 * @param const string &name The service method name (usually independent
424
 *                           of service method parameters).
425
 * @param const string &signature A signature uniquely identifying the method
426
 *                                invocation (usually name plus parameters).
427
 */
428
ServiceMethod::ServiceMethod(ServiceTracker *tracker,
429
                             const string &name,
430
                             const string &signature,
431
                             bool featureLogOnly)
432
  : tracker_(tracker), name_(name), signature_(signature),
433
    featureLogOnly_(featureLogOnly)
434
{
435
  // note: timer_ automatically starts at construction.
436
 
437
  // invoke tracker to start service
438
  // note: Might throw.  If it throws, then this object's destructor
439
  // won't be called, which is according to plan: finishService() is
440
  // only supposed to be matched to startService() if startService()
441
  // returns without error.
442
  tracker_->startService(*this);
443
}
444
 
445
ServiceMethod::ServiceMethod(ServiceTracker *tracker,
446
                             const string &name,
447
                             uint64_t id,
448
                             bool featureLogOnly)
449
  : tracker_(tracker), name_(name), featureLogOnly_(featureLogOnly)
450
{
451
  // note: timer_ automatically starts at construction.
452
  stringstream ss_signature;
453
  ss_signature << name << " (" << id << ')';
454
  signature_ = ss_signature.str();
455
 
456
  // invoke tracker to start service
457
  // note: Might throw.  If it throws, then this object's destructor
458
  // won't be called, which is according to plan: finishService() is
459
  // only supposed to be matched to startService() if startService()
460
  // returns without error.
461
  tracker_->startService(*this);
462
}
463
 
464
ServiceMethod::~ServiceMethod()
465
{
466
  // invoke tracker to finish service
467
  // note: Not expecting an exception from this code, but
468
  // finishService() might conceivably throw an out-of-memory
469
  // exception.
470
  try {
471
    tracker_->finishService(*this);
472
  } catch (...) {
473
    // don't throw
474
  }
475
}
476
 
477
uint64_t
478
ServiceMethod::step(const std::string &stepName)
479
{
480
  return tracker_->stepService(*this, stepName);
481
}