Merge "Adds first-byte latency timings for GET requests."

This commit is contained in:
Jenkins 2013-02-23 01:10:58 +00:00 committed by Gerrit Code Review
commit d1381ba86b
3 changed files with 211 additions and 72 deletions

View File

@ -462,13 +462,14 @@ servers when sending statistics to a central StatsD server. If you run a local
StatsD server per node, you could configure a per-node metrics prefix there and StatsD server per node, you could configure a per-node metrics prefix there and
leave `log_statsd_metric_prefix` blank. leave `log_statsd_metric_prefix` blank.
Note that metrics reported to StatsD are counters or timing data (which Note that metrics reported to StatsD are counters or timing data (which are
StatsD usually expands out to min, max, avg, count, and 90th percentile sent in units of milliseconds). StatsD usually expands timing data out to min,
per timing metric). Some important "gauge" metrics will still need to max, avg, count, and 90th percentile per timing metric, but the details of
be collected using another method. For example, the this behavior will depend on the configuration of your StatsD server. Some
`object-server.async_pendings` StatsD metric counts the generation of important "gauge" metrics may still need to be collected using another method.
async_pendings in real-time, but will not tell you the current number For example, the `object-server.async_pendings` StatsD metric counts the generation
of async_pending container updates on disk at any point in time. of async_pendings in real-time, but will not tell you the current number of
async_pending container updates on disk at any point in time.
Note also that the set of metrics collected, their names, and their semantics Note also that the set of metrics collected, their names, and their semantics
are not locked down and will change over time. StatsD logging is currently in are not locked down and will change over time. StatsD logging is currently in
@ -494,7 +495,7 @@ Metric Name Description
`account-reaper.errors` Count of devices failing the mount check. `account-reaper.errors` Count of devices failing the mount check.
`account-reaper.timing` Timing data for each reap_account() call. `account-reaper.timing` Timing data for each reap_account() call.
`account-reaper.return_codes.X` Count of HTTP return codes from various operations `account-reaper.return_codes.X` Count of HTTP return codes from various operations
(eg. object listing, container deletion, etc.). The (e.g. object listing, container deletion, etc.). The
value for X is the first digit of the return code value for X is the first digit of the return code
(2 for 201, 4 for 404, etc.). (2 for 201, 4 for 404, etc.).
`account-reaper.containers_failures` Count of failures to delete a container. `account-reaper.containers_failures` Count of failures to delete a container.
@ -840,17 +841,23 @@ the default setting yields the above behavior.
.. _Swift Origin Server: https://github.com/dpgoetz/sos .. _Swift Origin Server: https://github.com/dpgoetz/sos
============================================ ==================================================== ==================================================== ============================================
Metric Name Description Metric Name Description
-------------------------------------------- ---------------------------------------------------- ---------------------------------------------------- --------------------------------------------
`proxy-server.<type>.<verb>.<status>.timing` Timing data for requests. The <status> portion is `proxy-server.<type>.<verb>.<status>.timing` Timing data for requests, start to finish.
the numeric HTTP status code for the request (eg. The <status> portion is the numeric HTTP
"200" or "404") status code for the request (e.g. "200" or
`proxy-server.<type>.<verb>.<status>.xfer` The count of the sum of bytes transferred in (from "404").
clients) and out (to clients) for requests. The `proxy-server.<type>.GET.<status>.first-byte.timing` Timing data up to completion of sending the
<type>, <verb>, and <status> portions of the metric response headers (only for GET requests).
are just like the timing metric. <status> and <type> are as for the main
============================================ ==================================================== timing metric.
`proxy-server.<type>.<verb>.<status>.xfer` This counter metric is the sum of bytes
transferred in (from clients) and out (to
clients) for requests. The <type>, <verb>,
and <status> portions of the metric are just
like the main timing metric.
==================================================== ============================================
Metrics for `tempauth` middleware (in the table, `<reseller_prefix>` represents Metrics for `tempauth` middleware (in the table, `<reseller_prefix>` represents
the actual configured reseller_prefix or "`NONE`" if the reseller_prefix is the the actual configured reseller_prefix or "`NONE`" if the reseller_prefix is the

View File

@ -99,22 +99,28 @@ class ProxyLoggingMiddleware(object):
log_route='proxy-access') log_route='proxy-access')
self.access_logger.set_statsd_prefix('proxy-server') self.access_logger.set_statsd_prefix('proxy-server')
def log_request(self, env, status_int, bytes_received, bytes_sent, def method_from_req(self, req):
request_time, client_disconnect): return req.environ.get('swift.orig_req_method', req.method)
def req_already_logged(self, req):
return req.environ.get('swift.proxy_access_log_made')
def mark_req_logged(self, req):
req.environ['swift.proxy_access_log_made'] = True
def log_request(self, req, status_int, bytes_received, bytes_sent,
request_time):
""" """
Log a request. Log a request.
:param env: WSGI environment :param req: swob.Request object for the request
:param status_int: integer code for the response status :param status_int: integer code for the response status
:param bytes_received: bytes successfully read from the request body :param bytes_received: bytes successfully read from the request body
:param bytes_sent: bytes yielded to the WSGI server :param bytes_sent: bytes yielded to the WSGI server
:param request_time: time taken to satisfy the request, in seconds :param request_time: time taken to satisfy the request, in seconds
""" """
if env.get('swift.proxy_access_log_made'): if self.req_already_logged(req):
return return
req = Request(env)
if client_disconnect: # log disconnected clients as '499' status code
status_int = 499
req_path = get_valid_utf8_str(req.path) req_path = get_valid_utf8_str(req.path)
the_request = quote(unquote(req_path)) the_request = quote(unquote(req_path))
if req.query_string: if req.query_string:
@ -123,7 +129,7 @@ class ProxyLoggingMiddleware(object):
if self.log_hdrs: if self.log_hdrs:
logged_headers = '\n'.join('%s: %s' % (k, v) logged_headers = '\n'.join('%s: %s' % (k, v)
for k, v in req.headers.items()) for k, v in req.headers.items())
method = req.environ.get('swift.orig_req_method', req.method) method = self.method_from_req(req)
self.access_logger.info(' '.join( self.access_logger.info(' '.join(
quote(str(x) if x else '-') quote(str(x) if x else '-')
for x in ( for x in (
@ -145,8 +151,18 @@ class ProxyLoggingMiddleware(object):
'%.4f' % request_time, '%.4f' % request_time,
req.environ.get('swift.source'), req.environ.get('swift.source'),
))) )))
env['swift.proxy_access_log_made'] = True self.mark_req_logged(req)
# Log timing and bytes-transfered data to StatsD # Log timing and bytes-transfered data to StatsD
metric_name = self.statsd_metric_name(req, status_int, method)
# Only log data for valid controllers (or SOS) to keep the metric count
# down (egregious errors will get logged by the proxy server itself).
if metric_name:
self.access_logger.timing(metric_name + '.timing',
request_time * 1000)
self.access_logger.update_stats(metric_name + '.xfer',
bytes_received + bytes_sent)
def statsd_metric_name(self, req, status_int, method):
if req.path.startswith('/v1/'): if req.path.startswith('/v1/'):
try: try:
stat_type = [None, 'account', 'container', stat_type = [None, 'account', 'container',
@ -154,17 +170,12 @@ class ProxyLoggingMiddleware(object):
except IndexError: except IndexError:
stat_type = 'object' stat_type = 'object'
else: else:
stat_type = env.get('swift.source') stat_type = req.environ.get('swift.source')
# Only log data for valid controllers (or SOS) to keep the metric count if stat_type is None:
# down (egregious errors will get logged by the proxy server itself). return None
if stat_type: stat_method = method if method in self.valid_methods \
stat_method = method if method in self.valid_methods \ else 'BAD_METHOD'
else 'BAD_METHOD' return '.'.join((stat_type, stat_method, str(status_int)))
metric_name = '.'.join((stat_type, stat_method, str(status_int)))
self.access_logger.timing(metric_name + '.timing',
request_time * 1000)
self.access_logger.update_stats(metric_name + '.xfer',
bytes_received + bytes_sent)
def __call__(self, env, start_response): def __call__(self, env, start_response):
start_response_args = [None] start_response_args = [None]
@ -175,6 +186,14 @@ class ProxyLoggingMiddleware(object):
def my_start_response(status, headers, exc_info=None): def my_start_response(status, headers, exc_info=None):
start_response_args[0] = (status, list(headers), exc_info) start_response_args[0] = (status, list(headers), exc_info)
def status_int_for_logging(client_disconnect=False, start_status=None):
# log disconnected clients as '499' status code
if client_disconnect or input_proxy.client_disconnect:
return 499
elif start_status is None:
return int(start_response_args[0][0].split(' ', 1)[0])
return start_status
def iter_response(iterable): def iter_response(iterable):
iterator = iter(iterable) iterator = iter(iterable)
try: try:
@ -193,6 +212,17 @@ class ProxyLoggingMiddleware(object):
start_response_args[0][1].append( start_response_args[0][1].append(
('content-length', str(sum(len(i) for i in iterable)))) ('content-length', str(sum(len(i) for i in iterable))))
start_response(*start_response_args[0]) start_response(*start_response_args[0])
req = Request(env)
# Log timing information for time-to-first-byte (GET requests only)
method = self.method_from_req(req)
if method == 'GET' and not self.req_already_logged(req):
status_int = status_int_for_logging()
metric_name = self.statsd_metric_name(req, status_int, method)
if metric_name:
self.access_logger.timing_since(
metric_name + '.first-byte.timing', start_time)
bytes_sent = 0 bytes_sent = 0
client_disconnect = False client_disconnect = False
try: try:
@ -204,18 +234,19 @@ class ProxyLoggingMiddleware(object):
client_disconnect = True client_disconnect = True
raise raise
finally: finally:
status_int = int(start_response_args[0][0].split(' ', 1)[0]) status_int = status_int_for_logging(client_disconnect)
self.log_request( self.log_request(
env, status_int, input_proxy.bytes_received, bytes_sent, req, status_int, input_proxy.bytes_received, bytes_sent,
time.time() - start_time, time.time() - start_time)
client_disconnect or input_proxy.client_disconnect)
try: try:
iterable = self.app(env, my_start_response) iterable = self.app(env, my_start_response)
except Exception: except Exception:
req = Request(env)
status_int = status_int_for_logging(start_status=500)
self.log_request( self.log_request(
env, 500, input_proxy.bytes_received, 0, req, status_int, input_proxy.bytes_received, 0,
time.time() - start_time, input_proxy.client_disconnect) time.time() - start_time)
raise raise
else: else:
return iter_response(iterable) return iter_response(iterable)

View File

@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import time
from urllib import quote, unquote from urllib import quote, unquote
import cStringIO as StringIO import cStringIO as StringIO
from logging.handlers import SysLogHandler from logging.handlers import SysLogHandler
@ -25,12 +26,14 @@ from swift.common.swob import Request
class FakeApp(object): class FakeApp(object):
def __init__(self, body=['FAKE APP']): def __init__(self, body=['FAKE APP'], response_str='200 OK'):
self.body = body self.body = body
self.response_str = response_str
def __call__(self, env, start_response): def __call__(self, env, start_response):
start_response('200 OK', [('Content-Type', 'text/plain'), start_response(self.response_str,
('Content-Length', str(sum(map(len, self.body))))]) [('Content-Type', 'text/plain'),
('Content-Length', str(sum(map(len, self.body))))])
while env['wsgi.input'].read(5): while env['wsgi.input'].read(5):
pass pass
return self.body return self.body
@ -72,19 +75,48 @@ def start_response(*args):
class TestProxyLogging(unittest.TestCase): class TestProxyLogging(unittest.TestCase):
def _log_parts(self, app): def _log_parts(self, app, should_be_empty=False):
info_calls = app.access_logger.log_dict['info'] info_calls = app.access_logger.log_dict['info']
self.assertEquals(1, len(info_calls)) if should_be_empty:
return info_calls[0][0][0].split(' ') self.assertEquals([], info_calls)
else:
self.assertEquals(1, len(info_calls))
return info_calls[0][0][0].split(' ')
def assertTiming(self, exp_metric, app, exp_timing=None): def assertTiming(self, exp_metric, app, exp_timing=None):
timing_calls = app.access_logger.log_dict['timing'] timing_calls = app.access_logger.log_dict['timing']
self.assertEquals(1, len(timing_calls)) found = False
self.assertEquals({}, timing_calls[0][1]) for timing_call in timing_calls:
self.assertEquals(2, len(timing_calls[0][0])) self.assertEquals({}, timing_call[1])
self.assertEquals(exp_metric, timing_calls[0][0][0]) self.assertEquals(2, len(timing_call[0]))
if exp_timing is not None: if timing_call[0][0] == exp_metric:
self.assertEquals(exp_timing, timing_calls[0][0][1]) found = True
if exp_timing is not None:
self.assertAlmostEqual(exp_timing, timing_call[0][1],
places=4)
if not found:
self.assertTrue(False, 'assertTiming: %s not found in %r' % (
exp_metric, timing_calls))
def assertTimingSince(self, exp_metric, app, exp_start=None):
timing_calls = app.access_logger.log_dict['timing_since']
found = False
for timing_call in timing_calls:
self.assertEquals({}, timing_call[1])
self.assertEquals(2, len(timing_call[0]))
if timing_call[0][0] == exp_metric:
found = True
if exp_start is not None:
self.assertAlmostEqual(exp_start, timing_call[0][1],
places=4)
if not found:
self.assertTrue(False, 'assertTimingSince: %s not found in %r' % (
exp_metric, timing_calls))
def assertNotTiming(self, not_exp_metric, app):
timing_calls = app.access_logger.log_dict['timing']
for timing_call in timing_calls:
self.assertNotEqual(not_exp_metric, timing_call[0][0])
def assertUpdateStats(self, exp_metric, exp_bytes, app): def assertUpdateStats(self, exp_metric, exp_bytes, app):
update_stats_calls = app.access_logger.log_dict['update_stats'] update_stats_calls = app.access_logger.log_dict['update_stats']
@ -99,9 +131,8 @@ class TestProxyLogging(unittest.TestCase):
req = Request.blank(url, environ={'REQUEST_METHOD': 'GET'}) req = Request.blank(url, environ={'REQUEST_METHOD': 'GET'})
resp = app(req.environ, start_response) resp = app(req.environ, start_response)
resp_body = ''.join(resp) resp_body = ''.join(resp)
self.assertEquals(0, len(app.access_logger.log_dict['timing'])) self.assertEqual([], app.access_logger.log_dict['timing'])
self.assertEquals(0, self.assertEqual([], app.access_logger.log_dict['update_stats'])
len(app.access_logger.log_dict['update_stats']))
def test_log_request_stat_type_bad(self): def test_log_request_stat_type_bad(self):
for bad_path in ['', '/', '/bad', '/baddy/mc_badderson', '/v1', for bad_path in ['', '/', '/bad', '/baddy/mc_badderson', '/v1',
@ -109,11 +140,21 @@ class TestProxyLogging(unittest.TestCase):
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {}) app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
app.access_logger = FakeLogger() app.access_logger = FakeLogger()
req = Request.blank(bad_path, environ={'REQUEST_METHOD': 'GET'}) req = Request.blank(bad_path, environ={'REQUEST_METHOD': 'GET'})
app.log_request(req.environ, 123, 7, 13, 2.71828182846, False) app.log_request(req, 123, 7, 13, 2.71828182846)
self.assertEqual([], app.access_logger.log_dict['timing']) self.assertEqual([], app.access_logger.log_dict['timing'])
self.assertEqual([], app.access_logger.log_dict['update_stats']) self.assertEqual([], app.access_logger.log_dict['update_stats'])
def test_log_request_stat_type_good(self): def test_log_request_stat_type_good(self):
"""
log_request() should send timing and byte-count counters for GET
requests. Also, __call__()'s iter_response() function should
statsd-log time to first byte (calling the passed-in start_response
function), but only for GET requests.
"""
stub_times = []
def stub_time():
return stub_times.pop(0)
path_types = { path_types = {
'/v1/a': 'account', '/v1/a': 'account',
'/v1/a/': 'account', '/v1/a/': 'account',
@ -126,13 +167,62 @@ class TestProxyLogging(unittest.TestCase):
'/v1/a/c/o/p/p2': 'object', '/v1/a/c/o/p/p2': 'object',
} }
for path, exp_type in path_types.iteritems(): for path, exp_type in path_types.iteritems():
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {}) orig_time = time.time
app.access_logger = FakeLogger() try:
req = Request.blank(path, environ={'REQUEST_METHOD': 'GET'}) time.time = stub_time
app.log_request(req.environ, 321, 7, 13, 2.71828182846, False) # GET
self.assertTiming('%s.GET.321.timing' % exp_type, app, app = proxy_logging.ProxyLoggingMiddleware(
exp_timing=2.71828182846 * 1000) FakeApp(body='7654321', response_str='321 Fubar'), {})
self.assertUpdateStats('%s.GET.321.xfer' % exp_type, 7 + 13, app) app.access_logger = FakeLogger()
req = Request.blank(path, environ={
'REQUEST_METHOD': 'GET',
'wsgi.input': StringIO.StringIO('4321')})
stub_times = [18.0, 20.71828182846]
iter_response = app(req.environ, lambda *_: None)
self.assertEqual('7654321', ''.join(iter_response))
self.assertTiming('%s.GET.321.timing' % exp_type, app,
exp_timing=2.71828182846 * 1000)
self.assertTimingSince(
'%s.GET.321.first-byte.timing' % exp_type, app,
exp_start=18.0)
self.assertUpdateStats('%s.GET.321.xfer' % exp_type,
4 + 7, app)
# GET with swift.proxy_access_log_made already set
app = proxy_logging.ProxyLoggingMiddleware(
FakeApp(body='7654321', response_str='321 Fubar'), {})
app.access_logger = FakeLogger()
req = Request.blank(path, environ={
'REQUEST_METHOD': 'GET',
'swift.proxy_access_log_made': True,
'wsgi.input': StringIO.StringIO('4321')})
stub_times = [18.0, 20.71828182846]
iter_response = app(req.environ, lambda *_: None)
self.assertEqual('7654321', ''.join(iter_response))
self.assertEqual([], app.access_logger.log_dict['timing'])
self.assertEqual([], app.access_logger.log_dict['timing_since'])
self.assertEqual([], app.access_logger.log_dict['update_stats'])
# PUT (no first-byte timing!)
app = proxy_logging.ProxyLoggingMiddleware(
FakeApp(body='87654321', response_str='314 PiTown'), {})
app.access_logger = FakeLogger()
req = Request.blank(path, environ={
'REQUEST_METHOD': 'PUT',
'wsgi.input': StringIO.StringIO('654321')})
# (it's not a GET, so time() doesn't have a 2nd call)
stub_times = [58.2, 58.2 + 7.3321]
iter_response = app(req.environ, lambda *_: None)
self.assertEqual('87654321', ''.join(iter_response))
self.assertTiming('%s.PUT.314.timing' % exp_type, app,
exp_timing=7.3321 * 1000)
self.assertNotTiming(
'%s.GET.314.first-byte.timing' % exp_type, app)
self.assertNotTiming(
'%s.PUT.314.first-byte.timing' % exp_type, app)
self.assertUpdateStats('%s.PUT.314.xfer' % exp_type, 6 + 8, app)
finally:
time.time = orig_time
def test_log_request_stat_method_filtering_default(self): def test_log_request_stat_method_filtering_default(self):
method_map = { method_map = {
@ -152,7 +242,7 @@ class TestProxyLogging(unittest.TestCase):
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {}) app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
app.access_logger = FakeLogger() app.access_logger = FakeLogger()
req = Request.blank('/v1/a/', environ={'REQUEST_METHOD': method}) req = Request.blank('/v1/a/', environ={'REQUEST_METHOD': method})
app.log_request(req.environ, 299, 11, 3, 1.17, False) app.log_request(req, 299, 11, 3, 1.17)
self.assertTiming('account.%s.299.timing' % exp_method, app, self.assertTiming('account.%s.299.timing' % exp_method, app,
exp_timing=1.17 * 1000) exp_timing=1.17 * 1000)
self.assertUpdateStats('account.%s.299.xfer' % exp_method, self.assertUpdateStats('account.%s.299.xfer' % exp_method,
@ -177,7 +267,7 @@ class TestProxyLogging(unittest.TestCase):
}) })
app.access_logger = FakeLogger() app.access_logger = FakeLogger()
req = Request.blank('/v1/a/c', environ={'REQUEST_METHOD': method}) req = Request.blank('/v1/a/c', environ={'REQUEST_METHOD': method})
app.log_request(req.environ, 911, 4, 43, 1.01, False) app.log_request(req, 911, 4, 43, 1.01)
self.assertTiming('container.%s.911.timing' % exp_method, app, self.assertTiming('container.%s.911.timing' % exp_method, app,
exp_timing=1.01 * 1000) exp_timing=1.01 * 1000)
self.assertUpdateStats('container.%s.911.xfer' % exp_method, self.assertUpdateStats('container.%s.911.xfer' % exp_method,
@ -197,6 +287,17 @@ class TestProxyLogging(unittest.TestCase):
self.assertEquals(resp_body, 'FAKE APP') self.assertEquals(resp_body, 'FAKE APP')
self.assertEquals(log_parts[11], str(len(resp_body))) self.assertEquals(log_parts[11], str(len(resp_body)))
def test_basic_req_second_time(self):
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
app.access_logger = FakeLogger()
req = Request.blank('/', environ={
'swift.proxy_access_log_made': True,
'REQUEST_METHOD': 'GET'})
resp = app(req.environ, start_response)
resp_body = ''.join(resp)
log_parts = self._log_parts(app, should_be_empty=True)
self.assertEquals(resp_body, 'FAKE APP')
def test_multi_segment_resp(self): def test_multi_segment_resp(self):
app = proxy_logging.ProxyLoggingMiddleware(FakeApp( app = proxy_logging.ProxyLoggingMiddleware(FakeApp(
['some', 'chunks', 'of data']), {}) ['some', 'chunks', 'of data']), {})
@ -334,7 +435,7 @@ class TestProxyLogging(unittest.TestCase):
try: try:
resp = app(req.environ, start_response) resp = app(req.environ, start_response)
body = ''.join(resp) body = ''.join(resp)
except Exception: except IOError:
pass pass
log_parts = self._log_parts(app) log_parts = self._log_parts(app)
self.assertEquals(log_parts[6], '499') self.assertEquals(log_parts[6], '499')
@ -349,7 +450,7 @@ class TestProxyLogging(unittest.TestCase):
try: try:
resp = app(req.environ, start_response) resp = app(req.environ, start_response)
body = ''.join(resp) body = ''.join(resp)
except Exception: except IOError:
pass pass
log_parts = self._log_parts(app) log_parts = self._log_parts(app)
self.assertEquals(log_parts[6], '499') self.assertEquals(log_parts[6], '499')