Add more informative debug logs to agent processes

This patch adds some more debug logs into agent processes, so that
we can observe their behavior more in detail.
Also, this patch removes some detailed informations like pids from
responses returned to gateway/runtime, because gateway/runtime doesn't
require the details of agent behavior.

Change-Id: I2fd9998966120c46414e607b94ce03bee884e11f
This commit is contained in:
Takashi Kajinami 2020-09-06 16:53:19 +09:00
parent f0b74e1a76
commit be9e28dc67
4 changed files with 39 additions and 22 deletions

View File

@ -145,6 +145,7 @@ class SBusServer(object):
@command_handler
def ping(self, dtg):
self.logger.debug('Received ping. Responding')
return CommandSuccess('OK')
@command_handler

View File

@ -158,6 +158,8 @@ class StorletDaemon(SBusServer):
@command_handler
def execute(self, dtg):
task_id = str(uuid.uuid4())[:8]
self.logger.debug('Execute storlet %s (task %s)' %
(self.storlet_name, task_id))
while len(self.task_id_to_pid) >= self.pool_size:
self._wait_child_process()
@ -187,10 +189,11 @@ class StorletDaemon(SBusServer):
out_fds = dtg.object_out_fds
logger_fd = dtg.logger_out_fd
self.logger.debug('Start storlet invocation')
self.logger.debug('Started storlet invocation process')
self.logger.debug(
'in_fds:%s in_md:%s out_md_fds:%s out_fds:%s logger_fd: %s'
% (in_fds, in_md, out_md_fds, out_fds, logger_fd))
'Invocation fds: '
'in_fds:%s in_md:%s out_md_fds:%s out_fds:%s logger_fd:%s' %
(in_fds, in_md, out_md_fds, out_fds, logger_fd))
in_files = [self._create_input_file(st_md, md, in_fd)
for st_md, md, in_fd in zip(storlet_md, in_md, in_fds)]
@ -199,11 +202,11 @@ class StorletDaemon(SBusServer):
for out_md_fd, out_fd in zip(out_md_fds, out_fds)]
try:
self.logger.debug('Start storlet execution')
self.logger.debug('Start storlet invocation')
with StorletLogger(self.storlet_name, logger_fd) as slogger:
handler = self.storlet_cls(slogger)
handler(in_files, out_files, params)
self.logger.debug('Completed')
self.logger.debug('Completed storlet invocation')
except Exception:
self.logger.exception('Error in storlet invocation')
finally:
@ -213,6 +216,7 @@ class StorletDaemon(SBusServer):
@command_handler
def cancel(self, dtg):
task_id = dtg.task_id
self.logger.debug('Cancel task: %s' % task_id)
if task_id not in self.task_id_to_pid:
return CommandFailure('Task id %s is not found' % task_id, False)
@ -227,6 +231,7 @@ class StorletDaemon(SBusServer):
@command_handler
def halt(self, dtg):
self.logger.debug('Terminating')
return CommandSuccess('OK', False)
def _terminate(self):

View File

@ -195,6 +195,8 @@ class StorletDaemonFactory(SBusServer):
try:
resp = client.ping()
if resp.status:
self.logger.debug('The storlet daemon {0} is started'
.format(storlet_name))
return True
except SBusClientSendError:
pass
@ -319,7 +321,8 @@ class StorletDaemonFactory(SBusServer):
.format(storlet_name, dmn_pid))
if dmn_pid is None:
raise SDaemonError('{0} is not found'.format(storlet_name))
raise SDaemonError('The storlet daemon {0} is not found'
.format(storlet_name))
try:
os.kill(dmn_pid, signal.SIGKILL)
@ -331,7 +334,7 @@ class StorletDaemonFactory(SBusServer):
return obtained_pid, obtained_code
except OSError:
self.logger.exception(
'Error when sending kill signal to the storlet daemon %s' %
'Error when killing the storlet daemon %s' %
storlet_name)
raise SDaemonError('Failed to send kill signal to the storlet '
'daemon {0}'.format(storlet_name))
@ -344,12 +347,13 @@ class StorletDaemonFactory(SBusServer):
stop some of the storlet daemons
:raises SDaemonError: when failed to kill one of the storlet daemons
"""
self.logger.debug('Kill all storlet daemons')
failed = []
for storlet_name in list(self.storlet_name_to_pid):
try:
self.process_kill(storlet_name)
except SDaemonError:
self.logger.exception('Failed to stop the storlet daemon {0}'
self.logger.exception('Failed to kill the storlet daemon {0}'
.format(storlet_name))
if try_all:
failed.append(storlet_name)
@ -357,7 +361,7 @@ class StorletDaemonFactory(SBusServer):
raise
if failed:
names = ', '.join(failed)
raise SDaemonError('Failed to stop some storlet daemons: {0}'
raise SDaemonError('Failed to kill some storlet daemons: {0}'
.format(names))
def shutdown_all_processes(self, try_all=True):
@ -369,6 +373,7 @@ class StorletDaemonFactory(SBusServer):
:returns: a list of the terminated storlet daemons
:raises SDaemonError: when failed to kill one of the storlet daemons
"""
self.logger.debug('Shutdown all storlet daemons')
terminated = []
failed = []
for storlet_name in list(self.storlet_name_to_pid):
@ -402,9 +407,11 @@ class StorletDaemonFactory(SBusServer):
'Shutdown the storlet daemon {0}'.format(storlet_name))
dmn_pid = self.storlet_name_to_pid.get(storlet_name)
self.logger.debug('Storlet Daemon PID is {0}'.format(dmn_pid))
if dmn_pid is None:
raise SDaemonError('{0} is not found'.format(storlet_name))
raise SDaemonError('PID of the storlet daemon {0} is not found'.
format(storlet_name))
self.logger.debug('PID of the storlet daemon {0} is {1}'.
format(storlet_name, dmn_pid))
storlet_pipe_name = self.storlet_name_to_pipe_name[storlet_name]
self.logger.debug('Send HALT command to {0} via {1}'.
@ -428,6 +435,8 @@ class StorletDaemonFactory(SBusServer):
try:
os.waitpid(dmn_pid, 0)
self.storlet_name_to_pid.pop(storlet_name)
self.logger.debug(
'The storlet daemon {0} is stopped'.format(storlet_name))
except OSError:
self.logger.exception(
'Error when waiting the storlet daemon {0}'.format(
@ -448,7 +457,8 @@ class StorletDaemonFactory(SBusServer):
'daemon_language_version')):
msg = 'OK'
else:
msg = '{0} is already running'.format(storlet_name)
msg = 'The storlet daemon {0} is already running'.format(
storlet_name)
return CommandSuccess(msg)
except SDaemonError as err:
self.logger.exception('Failed to start the sdaemon for {0}'
@ -461,8 +471,8 @@ class StorletDaemonFactory(SBusServer):
storlet_name = params['storlet_name']
try:
pid, code = self.process_kill(storlet_name)
msg = 'Storlet {0}, PID = {1}, ErrCode = {2}'.format(
storlet_name, pid, code)
msg = 'The storlet daemon {0} is stopped'.format(
storlet_name)
return CommandSuccess(msg)
except SDaemonError as err:
self.logger.exception('Failed to kill the storlet daemon %s' %
@ -498,11 +508,11 @@ class StorletDaemonFactory(SBusServer):
@command_handler
def halt(self, dtg):
try:
terminated = self.shutdown_all_processes()
msg = '; '.join(['%s: terminated' % x for x in terminated])
self.shutdown_all_processes()
msg = 'Stopped all storlet daemons. Terminating.'
return CommandSuccess(msg, False)
except SDaemonError as err:
self.logger.exception('Failed to halt some storlet daemons')
self.logger.exception('Failed to stop some storlet daemons')
return CommandFailure(err.args[0], False)
def _terminate(self):

View File

@ -373,7 +373,7 @@ class TestStorletDaemonFactory(unittest.TestCase):
with mock.patch(self.kill_path) as kill, \
mock.patch(self.waitpid_path) as waitpid:
kill.side_effect = OSError()
exc_pattern = '^Failed to stop some storlet daemons: .*'
exc_pattern = '^Failed to kill some storlet daemons: .*'
with self.assertRaisesRegexp(SDaemonError, exc_pattern) as e:
self.dfactory.process_kill_all()
self.assertIn('storleta', str(e.exception))
@ -536,7 +536,8 @@ class TestStorletDaemonFactory(unittest.TestCase):
waitpid.return_value = 0, 0
ret = self.dfactory.start_daemon(DummyDatagram(prms))
self.assertTrue(ret.status)
self.assertEqual('storleta is already running', ret.message)
self.assertEqual(
'The storlet daemon storleta is already running', ret.message)
self.assertTrue(ret.iterable)
# Unsupported language
@ -556,7 +557,7 @@ class TestStorletDaemonFactory(unittest.TestCase):
resp = self.dfactory.stop_daemon(
DummyDatagram({'storlet_name': 'storleta'}))
self.assertTrue(resp.status)
self.assertEqual('Storlet storleta, PID = 1000, ErrCode = 0',
self.assertEqual('The storlet daemon storleta is stopped',
resp.message)
self.assertTrue(resp.iterable)
@ -614,8 +615,8 @@ class TestStorletDaemonFactory(unittest.TestCase):
halt.return_value = SBusResponse(True, 'OK')
resp = self.dfactory.halt(DummyDatagram())
self.assertTrue(resp.status)
self.assertIn('storleta: terminated', resp.message)
self.assertIn('storletb: terminated', resp.message)
self.assertEqual(
'Stopped all storlet daemons. Terminating.', resp.message)
self.assertFalse(resp.iterable)
def test_stop_daemons(self):