Added per disk PUT timing monitoring support.
Fixes bug 1104708 There could be severe performance drop for swift is one disk of one storage node is problematic due to the tragic state of async disk I/O. This patch provided PUT timing per kB transfered (ms/kB) monitoring support for each non-zero-byte request of each disk and report to statsD for alert. -adding "object-server.PUT.<device>.timing" metrics for object-server. DocImpact. Change-Id: Ie94bddad28e8be52e71683bf6c9db988664abe47
This commit is contained in:
parent
b6b5d6670d
commit
1d8a02f25c
@ -756,6 +756,9 @@ Metric Name Description
|
||||
`object-server.PUT.timeouts` Count of object PUTs which exceeded max_upload_time.
|
||||
`object-server.PUT.timing` Timing data for each PUT request not resulting in an
|
||||
error.
|
||||
`object-server.PUT.<device>.timing` Timing data per kB transfered (ms/kB) for each
|
||||
non-zero-byte PUT request on each device.
|
||||
Monitoring problematic devices, higher is bad.
|
||||
`object-server.GET.errors.timing` Timing data for GET request errors: bad request,
|
||||
not mounted, header timestamps before the epoch,
|
||||
precondition failed.
|
||||
|
@ -508,6 +508,12 @@ class StatsdClient(object):
|
||||
return self.timing(metric, (time.time() - orig_time) * 1000,
|
||||
sample_rate)
|
||||
|
||||
def transfer_rate(self, metric, elasped_time, byte_xfer, sample_rate=None):
|
||||
if byte_xfer:
|
||||
return self.timing(metric,
|
||||
elasped_time * 1000 / byte_xfer * 1000,
|
||||
sample_rate)
|
||||
|
||||
|
||||
def timing_stats(**dec_kwargs):
|
||||
"""
|
||||
@ -662,6 +668,7 @@ class LogAdapter(logging.LoggerAdapter, object):
|
||||
decrement = statsd_delegate('decrement')
|
||||
timing = statsd_delegate('timing')
|
||||
timing_since = statsd_delegate('timing_since')
|
||||
transfer_rate = statsd_delegate('transfer_rate')
|
||||
|
||||
|
||||
class SwiftLogFormatter(logging.Formatter):
|
||||
|
@ -642,6 +642,7 @@ class ObjectController(object):
|
||||
etag = md5()
|
||||
upload_size = 0
|
||||
last_sync = 0
|
||||
elasped_time = 0
|
||||
with file.mkstemp() as fd:
|
||||
try:
|
||||
fallocate(fd, int(request.headers.get('content-length', 0)))
|
||||
@ -649,6 +650,7 @@ class ObjectController(object):
|
||||
return HTTPInsufficientStorage(drive=device, request=request)
|
||||
reader = request.environ['wsgi.input'].read
|
||||
for chunk in iter(lambda: reader(self.network_chunk_size), ''):
|
||||
start_time = time.time()
|
||||
upload_size += len(chunk)
|
||||
if time.time() > upload_expiration:
|
||||
self.logger.increment('PUT.timeouts')
|
||||
@ -663,6 +665,11 @@ class ObjectController(object):
|
||||
drop_buffer_cache(fd, last_sync, upload_size - last_sync)
|
||||
last_sync = upload_size
|
||||
sleep()
|
||||
elasped_time += time.time() - start_time
|
||||
|
||||
if upload_size:
|
||||
self.logger.transfer_rate(
|
||||
'PUT.' + device + '.timing', elasped_time, upload_size)
|
||||
|
||||
if 'content-length' in request.headers and \
|
||||
int(request.headers['content-length']) != upload_size:
|
||||
|
Loading…
Reference in New Issue
Block a user