From 1c888c94a3457c5bc2225484e5dd86978c3d265d Mon Sep 17 00:00:00 2001 From: Gaudenz Steinlin Date: Wed, 26 Jun 2024 09:32:59 +0200 Subject: [PATCH] Improve Process fixture service restart handling The test_l2_agent_restart test was failing due to the agents not restarting within the timeout of 30s. This is fixed by: * Use `systemctl restart` to restart the service instead of killing and creating a new transient service. * Don't block on `systemctl` calls to allow parallel service operations. Previously this was serialized in the rootwrap daemon which lead to delays. * Use `KillMode=mixed` to first only kill the main process and give it 25s to cleanly shutdown all other processes. After this timeout all processes are killed. Previously systemd sent a SIGTERM to all processes which caused unclean shutdowns of some neutron agents which expected to shutdown their child processes themselves. Change-Id: Ic752e36e6fe6ba9b1fc9e7296204c086c465d76f Closes-Bug: #2070390 --- neutron/tests/fullstack/base.py | 2 +- neutron/tests/fullstack/resources/process.py | 33 ++++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/neutron/tests/fullstack/base.py b/neutron/tests/fullstack/base.py index 73f2ee8ac2d..c588f146eb6 100644 --- a/neutron/tests/fullstack/base.py +++ b/neutron/tests/fullstack/base.py @@ -120,7 +120,7 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin, common_utils.wait_until_true(_agent_down) def _assert_ping_during_agents_restart( - self, agents, src_namespace, ips, restart_timeout=10, + self, agents, src_namespace, ips, restart_timeout=30, ping_timeout=1, count=10): with net_helpers.async_ping( src_namespace, ips, timeout=ping_timeout, diff --git a/neutron/tests/fullstack/resources/process.py b/neutron/tests/fullstack/resources/process.py index 7502734e014..6991a4c0122 100644 --- a/neutron/tests/fullstack/resources/process.py +++ b/neutron/tests/fullstack/resources/process.py @@ -81,7 +81,10 @@ class ProcessFixture(fixtures.Fixture): systemd_run = [ 'systemd-run', '--service-type', 'exec', - '--property', 'TimeoutStopSec=30s', + # Timeout and KILL processes 5s before the timeout the restart + # tests use. + '--property', 'TimeoutStopSec=25s', + '--property', 'KillMode=mixed', '--unit', self.unit_name, '--setenv', f'PATH={os.environ["PATH"]}', '--same-dir', @@ -103,6 +106,7 @@ class ProcessFixture(fixtures.Fixture): # run unprivileged if run_as_root is False. run_as_root=True, ) + common_utils.wait_until_true(self.service_is_active) LOG.debug("Process started: %s", self.process_name) def stop(self, kill_signal=None): @@ -120,16 +124,26 @@ class ProcessFixture(fixtures.Fixture): msg = (f'Process killed with signal {kill_signal}: ' f'{self.process_name}') else: - stop_cmd = ['systemctl', 'stop', self.unit_name] + stop_cmd = ['systemctl', 'stop', '--no-block', self.unit_name] msg = f'Process stopped: {self.process_name}' utils.execute(stop_cmd, run_as_root=True) + common_utils.wait_until_true(self.process_is_not_running) LOG.debug(msg) def restart(self, executor=None): def _restart(): - self.stop() - self.start() + if self.process_is_running(): + restart_cmd = [ + 'systemctl', + 'restart', + '--no-block', + self.unit_name, + ] + utils.execute(restart_cmd, run_as_root=True) + common_utils.wait_until_true(self.service_is_active) + else: + self.start() LOG.debug("Restarting process: %s", self.process_name) @@ -138,14 +152,21 @@ class ProcessFixture(fixtures.Fixture): else: return executor.submit(_restart) - def process_is_running(self): + @property + def service_state(self): cmd = ['systemctl', 'is-active', self.unit_name] return utils.execute( cmd, run_as_root=True, log_fail_as_error=False, check_exit_code=False, - ) == 'active\n' + ).strip() + + def service_is_active(self): + return self.service_state == 'active' + + def process_is_running(self): + return self.service_state in ('active', 'activating', 'deactivating') def process_is_not_running(self): return not self.process_is_running()