Wait for conductor start before notifying systemd

Currently, the launcher first notifies systemd, then starts checking the
services (RPC and WSGI). So any failures will be reported, but only
after systemd declares the service ready.

This change adds a polling loop to make sure RpcService.start() finishes
successfully.

Change-Id: Ib460622d69a9cb1cb82e796a6ab294bbbb40c359
This commit is contained in:
Dmitry Tantsur 2022-01-26 14:59:34 +01:00
parent e841fa0545
commit 2c58ab3703
5 changed files with 69 additions and 0 deletions

View File

@ -67,6 +67,12 @@ def main():
issue_startup_warnings(CONF)
launcher = service.launch(CONF, mgr, restart_method='mutate')
# NOTE(dtantsur): handling start-up failures before launcher.wait() helps
# notify systemd about them. Otherwise the launcher will report successful
# service start-up before checking the threads.
mgr.wait_for_start()
sys.exit(launcher.wait())

View File

@ -49,4 +49,9 @@ def main():
wsgi = wsgi_service.WSGIService('ironic_api', CONF.api.enable_ssl_api)
launcher.launch_service(wsgi)
# NOTE(dtantsur): handling start-up failures before launcher.wait() helps
# notify systemd about them. Otherwise the launcher will report successful
# service start-up before checking the threads.
mgr.wait_for_start()
sys.exit(launcher.wait())

View File

@ -15,6 +15,8 @@
# under the License.
import signal
import sys
import time
from ironic_lib.json_rpc import server as json_rpc
from oslo_config import cfg
@ -42,9 +44,29 @@ class RPCService(service.Service):
self.topic = self.manager.topic
self.rpcserver = None
self.deregister = True
self._failure = None
self._started = False
def wait_for_start(self):
while not self._started and not self._failure:
time.sleep(0.1)
if self._failure:
LOG.critical(self._failure)
sys.exit(self._failure)
def start(self):
self._failure = None
self._started = False
super(RPCService, self).start()
try:
self._real_start()
except Exception as exc:
self._failure = f"{exc.__class__.__name__}: {exc}"
raise
else:
self._started = True
def _real_start(self):
admin_context = context.get_admin_context()
serializer = objects_base.IronicObjectSerializer(is_server=True)

View File

@ -46,6 +46,8 @@ class TestRPCService(base.TestCase):
mock_rpc, mock_ios, mock_target, mock_prepare_method):
mock_rpc.return_value.start = mock.MagicMock()
self.rpc_svc.handle_signal = mock.MagicMock()
self.assertFalse(self.rpc_svc._started)
self.assertFalse(self.rpc_svc._failure)
self.rpc_svc.start()
mock_ctx.assert_called_once_with()
mock_target.assert_called_once_with(topic=self.rpc_svc.topic,
@ -55,6 +57,9 @@ class TestRPCService(base.TestCase):
mock_init_method.assert_called_once_with(self.rpc_svc.manager,
mock_ctx.return_value)
self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager)
self.assertTrue(self.rpc_svc._started)
self.assertFalse(self.rpc_svc._failure)
self.rpc_svc.wait_for_start() # should be no-op
@mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True)
@mock.patch.object(oslo_messaging, 'Target', autospec=True)
@ -77,3 +82,29 @@ class TestRPCService(base.TestCase):
mock_init_method.assert_called_once_with(self.rpc_svc.manager,
mock_ctx.return_value)
self.assertIs(rpc.GLOBAL_MANAGER, self.rpc_svc.manager)
@mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True)
@mock.patch.object(oslo_messaging, 'Target', autospec=True)
@mock.patch.object(objects_base, 'IronicObjectSerializer', autospec=True)
@mock.patch.object(rpc, 'get_server', autospec=True)
@mock.patch.object(manager.ConductorManager, 'init_host', autospec=True)
@mock.patch.object(context, 'get_admin_context', autospec=True)
def test_start_failure(self, mock_ctx, mock_init_method, mock_rpc,
mock_ios, mock_target, mock_prepare_method):
mock_rpc.return_value.start = mock.MagicMock()
self.rpc_svc.handle_signal = mock.MagicMock()
mock_init_method.side_effect = RuntimeError("boom")
self.assertFalse(self.rpc_svc._started)
self.assertFalse(self.rpc_svc._failure)
self.assertRaises(RuntimeError, self.rpc_svc.start)
mock_ctx.assert_called_once_with()
mock_target.assert_called_once_with(topic=self.rpc_svc.topic,
server="fake_host")
mock_ios.assert_called_once_with(is_server=True)
mock_prepare_method.assert_called_once_with(self.rpc_svc.manager)
mock_init_method.assert_called_once_with(self.rpc_svc.manager,
mock_ctx.return_value)
self.assertIsNone(rpc.GLOBAL_MANAGER)
self.assertFalse(self.rpc_svc._started)
self.assertIn("boom", self.rpc_svc._failure)
self.assertRaises(SystemExit, self.rpc_svc.wait_for_start)

View File

@ -0,0 +1,5 @@
---
fixes:
- |
The ``ironic`` and ``ironic-conductor`` services now wait for the conductor
manager to start before notifying systemd about the successful start-up.