Add a command to lock down the agent

To support a safer take-over from the provisioning to the tenant network
for hardware that cannot be powered off, this change introduces a new
command system.lockdown. When invoked, it stops the API, the heartbeater
and disables all network interfaces (if possible).

Partial-Bug: #2077432
Change-Id: I211fc64a46226127b0d82ab458029b3c702b3f74
This commit is contained in:
Dmitry Tantsur 2024-11-06 13:17:37 +01:00
parent 5746ac1222
commit aa98250066
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
5 changed files with 118 additions and 0 deletions

View File

@ -36,6 +36,7 @@ from ironic_python_agent.extensions import base
from ironic_python_agent import hardware
from ironic_python_agent import inspector
from ironic_python_agent import ironic_api_client
from ironic_python_agent import netutils
from ironic_python_agent import utils
LOG = log.getLogger(__name__)
@ -248,6 +249,9 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
self.hardware_initialization_delay = hardware_initialization_delay
# IPA will stop serving requests and exit after this is set to False
self.serve_api = True
# Together with serve_api, this option allows locking down the system
# before IPA stops.
self.lockdown = False
self.agent_token = agent_token
# Allows this to be turned on by the conductor while running,
# in the event of long running ramdisks where the conductor
@ -568,3 +572,21 @@ class IronicPythonAgent(base.ExecuteCommandMixin):
if not self.standalone and self.api_urls:
self.heartbeater.stop()
if self.lockdown:
self._lockdown_system()
LOG.info('System locked down, looping forever to avoid a service '
'restart')
while True:
time.sleep(100)
def _lockdown_system(self):
LOG.info('Locking down system after the API stopped')
# NOTE(dtantsur): not going through hardware managers here to minimize
# the amount of operations.
for iface in netutils.list_interfaces():
try:
utils.execute('ip', 'link', 'set', iface, 'down')
except Exception as exc:
LOG.warning('Could not bring down interface %s: %s',
iface, exc)

View File

@ -0,0 +1,29 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_log import log
from ironic_python_agent.extensions import base
LOG = log.getLogger(__name__)
class SystemExtension(base.BaseAgentExtension):
# TODO(dtantsur): migrate (with deprecation) other system-wide commands
# from standby (power_off, run_image renamed into reboot, sync).
@base.sync_command('lockdown')
def lockdown(self):
"""Lock the agent down to prevent interactions with it."""
self.agent.lockdown = True
self.agent.serve_api = False

View File

@ -741,6 +741,65 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
self.assertTrue(mock_wait.called)
self.assertFalse(mock_dispatch.called)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)
@mock.patch.object(netutils, 'list_interfaces', autospec=True)
@mock.patch(
'ironic_python_agent.hardware_managers.cna._detect_cna_card',
mock.Mock())
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)
@mock.patch.object(agent.IronicPythonAgent,
'_wait_for_interface', autospec=True)
@mock.patch('oslo_service.wsgi.Server', autospec=True)
@mock.patch.object(hardware, 'get_managers', autospec=True)
def test_run_then_lockdown(self, mock_get_managers, mock_wsgi,
mock_wait, mock_dispatch, mock_interfaces,
mock_exec, mock_sleep):
CONF.set_override('inspection_callback_url', '')
wsgi_server = mock_wsgi.return_value
def set_serve_api():
self.agent.lockdown = True
self.agent.serve_api = False
wsgi_server.start.side_effect = set_serve_api
self.agent.heartbeater = mock.Mock()
self.agent.api_client.lookup_node = mock.Mock()
self.agent.api_client.lookup_node.return_value = {
'node': {
'uuid': 'deadbeef-dabb-ad00-b105-f00d00bab10c'
},
'config': {
'heartbeat_timeout': 300,
'agent_md5_checksum_enable': False
}
}
mock_interfaces.return_value = ['em1', 'em2']
class StopTesting(Exception):
"""Exception to exit the infinite loop."""
mock_sleep.side_effect = StopTesting
self.assertRaises(StopTesting, self.agent.run)
mock_wsgi.assert_called_once_with(CONF, 'ironic-python-agent',
app=self.agent.api,
host=mock.ANY, port=9999,
use_ssl=False)
wsgi_server.start.assert_called_once_with()
mock_wait.assert_called_once_with(mock.ANY)
self.assertEqual([mock.call('list_hardware_info'),
mock.call('wait_for_disks')],
mock_dispatch.call_args_list)
self.agent.heartbeater.start.assert_called_once_with()
self.agent.heartbeater.stop.assert_called_once_with()
mock_exec.assert_has_calls([
mock.call('ip', 'link', 'set', iface, 'down')
for iface in ['em1', 'em2']
])
@mock.patch.object(time, 'time', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True)

View File

@ -0,0 +1,7 @@
---
features:
- |
Adds a new API command ``system.lockdown``. When invoked, it stops the API,
the heartbeater and tries to disable all local network interfaces. More
actions may be added in the future to make the agent and the ramdisk
unusable after this command.

View File

@ -45,6 +45,7 @@ ironic_python_agent.extensions =
rescue = ironic_python_agent.extensions.rescue:RescueExtension
poll = ironic_python_agent.extensions.poll:PollExtension
service = ironic_python_agent.extensions.service:ServiceExtension
system = ironic_python_agent.extensions.system:SystemExtension
ironic_python_agent.hardware_managers =
generic = ironic_python_agent.hardware:GenericHardwareManager