From 51eef0d6d7ac6f511e23a8ceb59dc0f0dd7cfbdf Mon Sep 17 00:00:00 2001 From: Gregory Thiemonge Date: Wed, 25 May 2022 10:08:11 +0200 Subject: [PATCH] Fix driver-agent cleanup The cleanup function (that handles graceful shutdown) of the Octavia driver-agent didn't work properly, the threads that were spawned for stopping the Unix listeners were stuck (deadlock), so the threads leaked and the join functions in the caller didn't return. Now the code uses a thread for handling the connections, and the shutdown function is called from the process. Story 2010052 Task 45468 Change-Id: Ida33a1a24eab2706902b1ce3cca704d51fa6a599 --- .../drivers/driver_agent/driver_listener.py | 60 +++++++++---------- .../driver_agent/test_driver_listener.py | 15 ++--- ...nt-graceful-shutdown-daff9ffaccb09a9e.yaml | 7 +++ 3 files changed, 40 insertions(+), 42 deletions(-) create mode 100644 releasenotes/notes/fix-driver-agent-graceful-shutdown-daff9ffaccb09a9e.yaml diff --git a/octavia/api/drivers/driver_agent/driver_listener.py b/octavia/api/drivers/driver_agent/driver_listener.py index 51912db70a..041817262c 100644 --- a/octavia/api/drivers/driver_agent/driver_listener.py +++ b/octavia/api/drivers/driver_agent/driver_listener.py @@ -113,59 +113,53 @@ def _cleanup_socket_file(filename): def status_listener(exit_event): _cleanup_socket_file(CONF.driver_agent.status_socket_path) - server = ForkingUDSServer(CONF.driver_agent.status_socket_path, - StatusRequestHandler) + with ForkingUDSServer(CONF.driver_agent.status_socket_path, + StatusRequestHandler) as server: + server.timeout = CONF.driver_agent.status_request_timeout + server.max_children = CONF.driver_agent.status_max_processes - server.timeout = CONF.driver_agent.status_request_timeout - server.max_children = CONF.driver_agent.status_max_processes + threading.Thread(target=server.serve_forever).start() - while not exit_event.is_set(): - server.handle_request() + exit_event.wait() - LOG.info('Waiting for driver status listener to shutdown...') - # Can't shut ourselves down as we would deadlock, spawn a thread - threading.Thread(target=server.shutdown).start() - LOG.info('Driver status listener shutdown finished.') - server.server_close() + LOG.info('Waiting for driver status listener to shutdown...') + server.shutdown() + LOG.info('Driver status listener shutdown finished.') _cleanup_socket_file(CONF.driver_agent.status_socket_path) def stats_listener(exit_event): _cleanup_socket_file(CONF.driver_agent.stats_socket_path) - server = ForkingUDSServer(CONF.driver_agent.stats_socket_path, - StatsRequestHandler) + with ForkingUDSServer(CONF.driver_agent.stats_socket_path, + StatsRequestHandler) as server: + server.timeout = CONF.driver_agent.stats_request_timeout + server.max_children = CONF.driver_agent.stats_max_processes - server.timeout = CONF.driver_agent.stats_request_timeout - server.max_children = CONF.driver_agent.stats_max_processes + threading.Thread(target=server.serve_forever).start() - while not exit_event.is_set(): - server.handle_request() + exit_event.wait() - LOG.info('Waiting for driver statistics listener to shutdown...') - # Can't shut ourselves down as we would deadlock, spawn a thread - threading.Thread(target=server.shutdown).start() - LOG.info('Driver statistics listener shutdown finished.') - server.server_close() + LOG.info('Waiting for driver statistics listener to shutdown...') + server.shutdown() + LOG.info('Driver statistics listener shutdown finished.') _cleanup_socket_file(CONF.driver_agent.stats_socket_path) def get_listener(exit_event): _cleanup_socket_file(CONF.driver_agent.get_socket_path) - server = ForkingUDSServer(CONF.driver_agent.get_socket_path, - GetRequestHandler) + with ForkingUDSServer(CONF.driver_agent.get_socket_path, + GetRequestHandler) as server: + server.timeout = CONF.driver_agent.get_request_timeout + server.max_children = CONF.driver_agent.get_max_processes - server.timeout = CONF.driver_agent.get_request_timeout - server.max_children = CONF.driver_agent.get_max_processes + threading.Thread(target=server.serve_forever).start() - while not exit_event.is_set(): - server.handle_request() + exit_event.wait() - LOG.info('Waiting for driver get listener to shutdown...') - # Can't shut ourselves down as we would deadlock, spawn a thread - threading.Thread(target=server.shutdown).start() - LOG.info('Driver get listener shutdown finished.') - server.server_close() + LOG.info('Waiting for driver get listener to shutdown...') + server.shutdown() + LOG.info('Driver get listener shutdown finished.') _cleanup_socket_file(CONF.driver_agent.get_socket_path) LOG.info("UDS server was closed and socket was cleaned up.") diff --git a/octavia/tests/unit/api/drivers/driver_agent/test_driver_listener.py b/octavia/tests/unit/api/drivers/driver_agent/test_driver_listener.py index 33c08fe644..332081bf3d 100644 --- a/octavia/tests/unit/api/drivers/driver_agent/test_driver_listener.py +++ b/octavia/tests/unit/api/drivers/driver_agent/test_driver_listener.py @@ -156,13 +156,12 @@ class TestDriverListener(base.TestCase): 'a' * CONF.driver_agent.status_max_processes, 'a', 'a' * 1000, '']) type(mock_server).active_children = mock_active_children - mock_forking_server.return_value = mock_server + mock_forking_server.return_value.__enter__.return_value = mock_server mock_exit_event = mock.MagicMock() mock_exit_event.is_set.side_effect = [False, False, False, False, True] driver_listener.status_listener(mock_exit_event) - mock_server.handle_request.assert_called() - mock_server.server_close.assert_called_once() + mock_server.serve_forever.assert_called() self.assertEqual(2, mock_cleanup.call_count) @mock.patch('octavia.api.drivers.driver_agent.driver_listener.' @@ -176,13 +175,12 @@ class TestDriverListener(base.TestCase): 'a' * CONF.driver_agent.status_max_processes, 'a', 'a' * 1000, '']) type(mock_server).active_children = mock_active_children - mock_forking_server.return_value = mock_server + mock_forking_server.return_value.__enter__.return_value = mock_server mock_exit_event = mock.MagicMock() mock_exit_event.is_set.side_effect = [False, False, False, False, True] driver_listener.stats_listener(mock_exit_event) - mock_server.handle_request.assert_called() - mock_server.server_close.assert_called_once() + mock_server.serve_forever.assert_called() @mock.patch('octavia.api.drivers.driver_agent.driver_listener.' '_cleanup_socket_file') @@ -195,10 +193,9 @@ class TestDriverListener(base.TestCase): 'a' * CONF.driver_agent.status_max_processes, 'a', 'a' * 1000, '']) type(mock_server).active_children = mock_active_children - mock_forking_server.return_value = mock_server + mock_forking_server.return_value.__enter__.return_value = mock_server mock_exit_event = mock.MagicMock() mock_exit_event.is_set.side_effect = [False, False, False, False, True] driver_listener.get_listener(mock_exit_event) - mock_server.handle_request.assert_called() - mock_server.server_close.assert_called_once() + mock_server.serve_forever.assert_called() diff --git a/releasenotes/notes/fix-driver-agent-graceful-shutdown-daff9ffaccb09a9e.yaml b/releasenotes/notes/fix-driver-agent-graceful-shutdown-daff9ffaccb09a9e.yaml new file mode 100644 index 0000000000..512cf269b8 --- /dev/null +++ b/releasenotes/notes/fix-driver-agent-graceful-shutdown-daff9ffaccb09a9e.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + Fix the shutdown of the driver-agent, the process might have been stuck + while waiting for threads to finish. Systemd would have killed the process + after a timeout, but some children processes might have leaked on the + controllers.