Merge "Adds first-byte latency timings for GET requests."
This commit is contained in:
commit
d1381ba86b
@ -462,13 +462,14 @@ servers when sending statistics to a central StatsD server. If you run a local
|
||||
StatsD server per node, you could configure a per-node metrics prefix there and
|
||||
leave `log_statsd_metric_prefix` blank.
|
||||
|
||||
Note that metrics reported to StatsD are counters or timing data (which
|
||||
StatsD usually expands out to min, max, avg, count, and 90th percentile
|
||||
per timing metric). Some important "gauge" metrics will still need to
|
||||
be collected using another method. For example, the
|
||||
`object-server.async_pendings` StatsD metric counts the generation of
|
||||
async_pendings in real-time, but will not tell you the current number
|
||||
of async_pending container updates on disk at any point in time.
|
||||
Note that metrics reported to StatsD are counters or timing data (which are
|
||||
sent in units of milliseconds). StatsD usually expands timing data out to min,
|
||||
max, avg, count, and 90th percentile per timing metric, but the details of
|
||||
this behavior will depend on the configuration of your StatsD server. Some
|
||||
important "gauge" metrics may still need to be collected using another method.
|
||||
For example, the `object-server.async_pendings` StatsD metric counts the generation
|
||||
of async_pendings in real-time, but will not tell you the current number of
|
||||
async_pending container updates on disk at any point in time.
|
||||
|
||||
Note also that the set of metrics collected, their names, and their semantics
|
||||
are not locked down and will change over time. StatsD logging is currently in
|
||||
@ -494,7 +495,7 @@ Metric Name Description
|
||||
`account-reaper.errors` Count of devices failing the mount check.
|
||||
`account-reaper.timing` Timing data for each reap_account() call.
|
||||
`account-reaper.return_codes.X` Count of HTTP return codes from various operations
|
||||
(eg. object listing, container deletion, etc.). The
|
||||
(e.g. object listing, container deletion, etc.). The
|
||||
value for X is the first digit of the return code
|
||||
(2 for 201, 4 for 404, etc.).
|
||||
`account-reaper.containers_failures` Count of failures to delete a container.
|
||||
@ -840,17 +841,23 @@ the default setting yields the above behavior.
|
||||
|
||||
.. _Swift Origin Server: https://github.com/dpgoetz/sos
|
||||
|
||||
============================================ ====================================================
|
||||
Metric Name Description
|
||||
-------------------------------------------- ----------------------------------------------------
|
||||
`proxy-server.<type>.<verb>.<status>.timing` Timing data for requests. The <status> portion is
|
||||
the numeric HTTP status code for the request (eg.
|
||||
"200" or "404")
|
||||
`proxy-server.<type>.<verb>.<status>.xfer` The count of the sum of bytes transferred in (from
|
||||
clients) and out (to clients) for requests. The
|
||||
<type>, <verb>, and <status> portions of the metric
|
||||
are just like the timing metric.
|
||||
============================================ ====================================================
|
||||
==================================================== ============================================
|
||||
Metric Name Description
|
||||
---------------------------------------------------- --------------------------------------------
|
||||
`proxy-server.<type>.<verb>.<status>.timing` Timing data for requests, start to finish.
|
||||
The <status> portion is the numeric HTTP
|
||||
status code for the request (e.g. "200" or
|
||||
"404").
|
||||
`proxy-server.<type>.GET.<status>.first-byte.timing` Timing data up to completion of sending the
|
||||
response headers (only for GET requests).
|
||||
<status> and <type> are as for the main
|
||||
timing metric.
|
||||
`proxy-server.<type>.<verb>.<status>.xfer` This counter metric is the sum of bytes
|
||||
transferred in (from clients) and out (to
|
||||
clients) for requests. The <type>, <verb>,
|
||||
and <status> portions of the metric are just
|
||||
like the main timing metric.
|
||||
==================================================== ============================================
|
||||
|
||||
Metrics for `tempauth` middleware (in the table, `<reseller_prefix>` represents
|
||||
the actual configured reseller_prefix or "`NONE`" if the reseller_prefix is the
|
||||
|
@ -99,22 +99,28 @@ class ProxyLoggingMiddleware(object):
|
||||
log_route='proxy-access')
|
||||
self.access_logger.set_statsd_prefix('proxy-server')
|
||||
|
||||
def log_request(self, env, status_int, bytes_received, bytes_sent,
|
||||
request_time, client_disconnect):
|
||||
def method_from_req(self, req):
|
||||
return req.environ.get('swift.orig_req_method', req.method)
|
||||
|
||||
def req_already_logged(self, req):
|
||||
return req.environ.get('swift.proxy_access_log_made')
|
||||
|
||||
def mark_req_logged(self, req):
|
||||
req.environ['swift.proxy_access_log_made'] = True
|
||||
|
||||
def log_request(self, req, status_int, bytes_received, bytes_sent,
|
||||
request_time):
|
||||
"""
|
||||
Log a request.
|
||||
|
||||
:param env: WSGI environment
|
||||
:param req: swob.Request object for the request
|
||||
:param status_int: integer code for the response status
|
||||
:param bytes_received: bytes successfully read from the request body
|
||||
:param bytes_sent: bytes yielded to the WSGI server
|
||||
:param request_time: time taken to satisfy the request, in seconds
|
||||
"""
|
||||
if env.get('swift.proxy_access_log_made'):
|
||||
if self.req_already_logged(req):
|
||||
return
|
||||
req = Request(env)
|
||||
if client_disconnect: # log disconnected clients as '499' status code
|
||||
status_int = 499
|
||||
req_path = get_valid_utf8_str(req.path)
|
||||
the_request = quote(unquote(req_path))
|
||||
if req.query_string:
|
||||
@ -123,7 +129,7 @@ class ProxyLoggingMiddleware(object):
|
||||
if self.log_hdrs:
|
||||
logged_headers = '\n'.join('%s: %s' % (k, v)
|
||||
for k, v in req.headers.items())
|
||||
method = req.environ.get('swift.orig_req_method', req.method)
|
||||
method = self.method_from_req(req)
|
||||
self.access_logger.info(' '.join(
|
||||
quote(str(x) if x else '-')
|
||||
for x in (
|
||||
@ -145,8 +151,18 @@ class ProxyLoggingMiddleware(object):
|
||||
'%.4f' % request_time,
|
||||
req.environ.get('swift.source'),
|
||||
)))
|
||||
env['swift.proxy_access_log_made'] = True
|
||||
self.mark_req_logged(req)
|
||||
# Log timing and bytes-transfered data to StatsD
|
||||
metric_name = self.statsd_metric_name(req, status_int, method)
|
||||
# Only log data for valid controllers (or SOS) to keep the metric count
|
||||
# down (egregious errors will get logged by the proxy server itself).
|
||||
if metric_name:
|
||||
self.access_logger.timing(metric_name + '.timing',
|
||||
request_time * 1000)
|
||||
self.access_logger.update_stats(metric_name + '.xfer',
|
||||
bytes_received + bytes_sent)
|
||||
|
||||
def statsd_metric_name(self, req, status_int, method):
|
||||
if req.path.startswith('/v1/'):
|
||||
try:
|
||||
stat_type = [None, 'account', 'container',
|
||||
@ -154,17 +170,12 @@ class ProxyLoggingMiddleware(object):
|
||||
except IndexError:
|
||||
stat_type = 'object'
|
||||
else:
|
||||
stat_type = env.get('swift.source')
|
||||
# Only log data for valid controllers (or SOS) to keep the metric count
|
||||
# down (egregious errors will get logged by the proxy server itself).
|
||||
if stat_type:
|
||||
stat_method = method if method in self.valid_methods \
|
||||
else 'BAD_METHOD'
|
||||
metric_name = '.'.join((stat_type, stat_method, str(status_int)))
|
||||
self.access_logger.timing(metric_name + '.timing',
|
||||
request_time * 1000)
|
||||
self.access_logger.update_stats(metric_name + '.xfer',
|
||||
bytes_received + bytes_sent)
|
||||
stat_type = req.environ.get('swift.source')
|
||||
if stat_type is None:
|
||||
return None
|
||||
stat_method = method if method in self.valid_methods \
|
||||
else 'BAD_METHOD'
|
||||
return '.'.join((stat_type, stat_method, str(status_int)))
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
start_response_args = [None]
|
||||
@ -175,6 +186,14 @@ class ProxyLoggingMiddleware(object):
|
||||
def my_start_response(status, headers, exc_info=None):
|
||||
start_response_args[0] = (status, list(headers), exc_info)
|
||||
|
||||
def status_int_for_logging(client_disconnect=False, start_status=None):
|
||||
# log disconnected clients as '499' status code
|
||||
if client_disconnect or input_proxy.client_disconnect:
|
||||
return 499
|
||||
elif start_status is None:
|
||||
return int(start_response_args[0][0].split(' ', 1)[0])
|
||||
return start_status
|
||||
|
||||
def iter_response(iterable):
|
||||
iterator = iter(iterable)
|
||||
try:
|
||||
@ -193,6 +212,17 @@ class ProxyLoggingMiddleware(object):
|
||||
start_response_args[0][1].append(
|
||||
('content-length', str(sum(len(i) for i in iterable))))
|
||||
start_response(*start_response_args[0])
|
||||
req = Request(env)
|
||||
|
||||
# Log timing information for time-to-first-byte (GET requests only)
|
||||
method = self.method_from_req(req)
|
||||
if method == 'GET' and not self.req_already_logged(req):
|
||||
status_int = status_int_for_logging()
|
||||
metric_name = self.statsd_metric_name(req, status_int, method)
|
||||
if metric_name:
|
||||
self.access_logger.timing_since(
|
||||
metric_name + '.first-byte.timing', start_time)
|
||||
|
||||
bytes_sent = 0
|
||||
client_disconnect = False
|
||||
try:
|
||||
@ -204,18 +234,19 @@ class ProxyLoggingMiddleware(object):
|
||||
client_disconnect = True
|
||||
raise
|
||||
finally:
|
||||
status_int = int(start_response_args[0][0].split(' ', 1)[0])
|
||||
status_int = status_int_for_logging(client_disconnect)
|
||||
self.log_request(
|
||||
env, status_int, input_proxy.bytes_received, bytes_sent,
|
||||
time.time() - start_time,
|
||||
client_disconnect or input_proxy.client_disconnect)
|
||||
req, status_int, input_proxy.bytes_received, bytes_sent,
|
||||
time.time() - start_time)
|
||||
|
||||
try:
|
||||
iterable = self.app(env, my_start_response)
|
||||
except Exception:
|
||||
req = Request(env)
|
||||
status_int = status_int_for_logging(start_status=500)
|
||||
self.log_request(
|
||||
env, 500, input_proxy.bytes_received, 0,
|
||||
time.time() - start_time, input_proxy.client_disconnect)
|
||||
req, status_int, input_proxy.bytes_received, 0,
|
||||
time.time() - start_time)
|
||||
raise
|
||||
else:
|
||||
return iter_response(iterable)
|
||||
|
@ -14,6 +14,7 @@
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
import time
|
||||
from urllib import quote, unquote
|
||||
import cStringIO as StringIO
|
||||
from logging.handlers import SysLogHandler
|
||||
@ -25,12 +26,14 @@ from swift.common.swob import Request
|
||||
|
||||
|
||||
class FakeApp(object):
|
||||
def __init__(self, body=['FAKE APP']):
|
||||
def __init__(self, body=['FAKE APP'], response_str='200 OK'):
|
||||
self.body = body
|
||||
self.response_str = response_str
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
start_response('200 OK', [('Content-Type', 'text/plain'),
|
||||
('Content-Length', str(sum(map(len, self.body))))])
|
||||
start_response(self.response_str,
|
||||
[('Content-Type', 'text/plain'),
|
||||
('Content-Length', str(sum(map(len, self.body))))])
|
||||
while env['wsgi.input'].read(5):
|
||||
pass
|
||||
return self.body
|
||||
@ -72,19 +75,48 @@ def start_response(*args):
|
||||
|
||||
class TestProxyLogging(unittest.TestCase):
|
||||
|
||||
def _log_parts(self, app):
|
||||
def _log_parts(self, app, should_be_empty=False):
|
||||
info_calls = app.access_logger.log_dict['info']
|
||||
self.assertEquals(1, len(info_calls))
|
||||
return info_calls[0][0][0].split(' ')
|
||||
if should_be_empty:
|
||||
self.assertEquals([], info_calls)
|
||||
else:
|
||||
self.assertEquals(1, len(info_calls))
|
||||
return info_calls[0][0][0].split(' ')
|
||||
|
||||
def assertTiming(self, exp_metric, app, exp_timing=None):
|
||||
timing_calls = app.access_logger.log_dict['timing']
|
||||
self.assertEquals(1, len(timing_calls))
|
||||
self.assertEquals({}, timing_calls[0][1])
|
||||
self.assertEquals(2, len(timing_calls[0][0]))
|
||||
self.assertEquals(exp_metric, timing_calls[0][0][0])
|
||||
if exp_timing is not None:
|
||||
self.assertEquals(exp_timing, timing_calls[0][0][1])
|
||||
found = False
|
||||
for timing_call in timing_calls:
|
||||
self.assertEquals({}, timing_call[1])
|
||||
self.assertEquals(2, len(timing_call[0]))
|
||||
if timing_call[0][0] == exp_metric:
|
||||
found = True
|
||||
if exp_timing is not None:
|
||||
self.assertAlmostEqual(exp_timing, timing_call[0][1],
|
||||
places=4)
|
||||
if not found:
|
||||
self.assertTrue(False, 'assertTiming: %s not found in %r' % (
|
||||
exp_metric, timing_calls))
|
||||
|
||||
def assertTimingSince(self, exp_metric, app, exp_start=None):
|
||||
timing_calls = app.access_logger.log_dict['timing_since']
|
||||
found = False
|
||||
for timing_call in timing_calls:
|
||||
self.assertEquals({}, timing_call[1])
|
||||
self.assertEquals(2, len(timing_call[0]))
|
||||
if timing_call[0][0] == exp_metric:
|
||||
found = True
|
||||
if exp_start is not None:
|
||||
self.assertAlmostEqual(exp_start, timing_call[0][1],
|
||||
places=4)
|
||||
if not found:
|
||||
self.assertTrue(False, 'assertTimingSince: %s not found in %r' % (
|
||||
exp_metric, timing_calls))
|
||||
|
||||
def assertNotTiming(self, not_exp_metric, app):
|
||||
timing_calls = app.access_logger.log_dict['timing']
|
||||
for timing_call in timing_calls:
|
||||
self.assertNotEqual(not_exp_metric, timing_call[0][0])
|
||||
|
||||
def assertUpdateStats(self, exp_metric, exp_bytes, app):
|
||||
update_stats_calls = app.access_logger.log_dict['update_stats']
|
||||
@ -99,9 +131,8 @@ class TestProxyLogging(unittest.TestCase):
|
||||
req = Request.blank(url, environ={'REQUEST_METHOD': 'GET'})
|
||||
resp = app(req.environ, start_response)
|
||||
resp_body = ''.join(resp)
|
||||
self.assertEquals(0, len(app.access_logger.log_dict['timing']))
|
||||
self.assertEquals(0,
|
||||
len(app.access_logger.log_dict['update_stats']))
|
||||
self.assertEqual([], app.access_logger.log_dict['timing'])
|
||||
self.assertEqual([], app.access_logger.log_dict['update_stats'])
|
||||
|
||||
def test_log_request_stat_type_bad(self):
|
||||
for bad_path in ['', '/', '/bad', '/baddy/mc_badderson', '/v1',
|
||||
@ -109,11 +140,21 @@ class TestProxyLogging(unittest.TestCase):
|
||||
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank(bad_path, environ={'REQUEST_METHOD': 'GET'})
|
||||
app.log_request(req.environ, 123, 7, 13, 2.71828182846, False)
|
||||
app.log_request(req, 123, 7, 13, 2.71828182846)
|
||||
self.assertEqual([], app.access_logger.log_dict['timing'])
|
||||
self.assertEqual([], app.access_logger.log_dict['update_stats'])
|
||||
|
||||
def test_log_request_stat_type_good(self):
|
||||
"""
|
||||
log_request() should send timing and byte-count counters for GET
|
||||
requests. Also, __call__()'s iter_response() function should
|
||||
statsd-log time to first byte (calling the passed-in start_response
|
||||
function), but only for GET requests.
|
||||
"""
|
||||
stub_times = []
|
||||
def stub_time():
|
||||
return stub_times.pop(0)
|
||||
|
||||
path_types = {
|
||||
'/v1/a': 'account',
|
||||
'/v1/a/': 'account',
|
||||
@ -126,13 +167,62 @@ class TestProxyLogging(unittest.TestCase):
|
||||
'/v1/a/c/o/p/p2': 'object',
|
||||
}
|
||||
for path, exp_type in path_types.iteritems():
|
||||
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank(path, environ={'REQUEST_METHOD': 'GET'})
|
||||
app.log_request(req.environ, 321, 7, 13, 2.71828182846, False)
|
||||
self.assertTiming('%s.GET.321.timing' % exp_type, app,
|
||||
exp_timing=2.71828182846 * 1000)
|
||||
self.assertUpdateStats('%s.GET.321.xfer' % exp_type, 7 + 13, app)
|
||||
orig_time = time.time
|
||||
try:
|
||||
time.time = stub_time
|
||||
# GET
|
||||
app = proxy_logging.ProxyLoggingMiddleware(
|
||||
FakeApp(body='7654321', response_str='321 Fubar'), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank(path, environ={
|
||||
'REQUEST_METHOD': 'GET',
|
||||
'wsgi.input': StringIO.StringIO('4321')})
|
||||
stub_times = [18.0, 20.71828182846]
|
||||
iter_response = app(req.environ, lambda *_: None)
|
||||
self.assertEqual('7654321', ''.join(iter_response))
|
||||
self.assertTiming('%s.GET.321.timing' % exp_type, app,
|
||||
exp_timing=2.71828182846 * 1000)
|
||||
self.assertTimingSince(
|
||||
'%s.GET.321.first-byte.timing' % exp_type, app,
|
||||
exp_start=18.0)
|
||||
self.assertUpdateStats('%s.GET.321.xfer' % exp_type,
|
||||
4 + 7, app)
|
||||
|
||||
# GET with swift.proxy_access_log_made already set
|
||||
app = proxy_logging.ProxyLoggingMiddleware(
|
||||
FakeApp(body='7654321', response_str='321 Fubar'), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank(path, environ={
|
||||
'REQUEST_METHOD': 'GET',
|
||||
'swift.proxy_access_log_made': True,
|
||||
'wsgi.input': StringIO.StringIO('4321')})
|
||||
stub_times = [18.0, 20.71828182846]
|
||||
iter_response = app(req.environ, lambda *_: None)
|
||||
self.assertEqual('7654321', ''.join(iter_response))
|
||||
self.assertEqual([], app.access_logger.log_dict['timing'])
|
||||
self.assertEqual([], app.access_logger.log_dict['timing_since'])
|
||||
self.assertEqual([], app.access_logger.log_dict['update_stats'])
|
||||
|
||||
# PUT (no first-byte timing!)
|
||||
app = proxy_logging.ProxyLoggingMiddleware(
|
||||
FakeApp(body='87654321', response_str='314 PiTown'), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank(path, environ={
|
||||
'REQUEST_METHOD': 'PUT',
|
||||
'wsgi.input': StringIO.StringIO('654321')})
|
||||
# (it's not a GET, so time() doesn't have a 2nd call)
|
||||
stub_times = [58.2, 58.2 + 7.3321]
|
||||
iter_response = app(req.environ, lambda *_: None)
|
||||
self.assertEqual('87654321', ''.join(iter_response))
|
||||
self.assertTiming('%s.PUT.314.timing' % exp_type, app,
|
||||
exp_timing=7.3321 * 1000)
|
||||
self.assertNotTiming(
|
||||
'%s.GET.314.first-byte.timing' % exp_type, app)
|
||||
self.assertNotTiming(
|
||||
'%s.PUT.314.first-byte.timing' % exp_type, app)
|
||||
self.assertUpdateStats('%s.PUT.314.xfer' % exp_type, 6 + 8, app)
|
||||
finally:
|
||||
time.time = orig_time
|
||||
|
||||
def test_log_request_stat_method_filtering_default(self):
|
||||
method_map = {
|
||||
@ -152,7 +242,7 @@ class TestProxyLogging(unittest.TestCase):
|
||||
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank('/v1/a/', environ={'REQUEST_METHOD': method})
|
||||
app.log_request(req.environ, 299, 11, 3, 1.17, False)
|
||||
app.log_request(req, 299, 11, 3, 1.17)
|
||||
self.assertTiming('account.%s.299.timing' % exp_method, app,
|
||||
exp_timing=1.17 * 1000)
|
||||
self.assertUpdateStats('account.%s.299.xfer' % exp_method,
|
||||
@ -177,7 +267,7 @@ class TestProxyLogging(unittest.TestCase):
|
||||
})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank('/v1/a/c', environ={'REQUEST_METHOD': method})
|
||||
app.log_request(req.environ, 911, 4, 43, 1.01, False)
|
||||
app.log_request(req, 911, 4, 43, 1.01)
|
||||
self.assertTiming('container.%s.911.timing' % exp_method, app,
|
||||
exp_timing=1.01 * 1000)
|
||||
self.assertUpdateStats('container.%s.911.xfer' % exp_method,
|
||||
@ -197,6 +287,17 @@ class TestProxyLogging(unittest.TestCase):
|
||||
self.assertEquals(resp_body, 'FAKE APP')
|
||||
self.assertEquals(log_parts[11], str(len(resp_body)))
|
||||
|
||||
def test_basic_req_second_time(self):
|
||||
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(), {})
|
||||
app.access_logger = FakeLogger()
|
||||
req = Request.blank('/', environ={
|
||||
'swift.proxy_access_log_made': True,
|
||||
'REQUEST_METHOD': 'GET'})
|
||||
resp = app(req.environ, start_response)
|
||||
resp_body = ''.join(resp)
|
||||
log_parts = self._log_parts(app, should_be_empty=True)
|
||||
self.assertEquals(resp_body, 'FAKE APP')
|
||||
|
||||
def test_multi_segment_resp(self):
|
||||
app = proxy_logging.ProxyLoggingMiddleware(FakeApp(
|
||||
['some', 'chunks', 'of data']), {})
|
||||
@ -334,7 +435,7 @@ class TestProxyLogging(unittest.TestCase):
|
||||
try:
|
||||
resp = app(req.environ, start_response)
|
||||
body = ''.join(resp)
|
||||
except Exception:
|
||||
except IOError:
|
||||
pass
|
||||
log_parts = self._log_parts(app)
|
||||
self.assertEquals(log_parts[6], '499')
|
||||
@ -349,7 +450,7 @@ class TestProxyLogging(unittest.TestCase):
|
||||
try:
|
||||
resp = app(req.environ, start_response)
|
||||
body = ''.join(resp)
|
||||
except Exception:
|
||||
except IOError:
|
||||
pass
|
||||
log_parts = self._log_parts(app)
|
||||
self.assertEquals(log_parts[6], '499')
|
||||
|
Loading…
Reference in New Issue
Block a user