From e1a08646352a8ab3405e7fde2db16bddbfe368e9 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Fri, 11 Aug 2023 12:44:40 -0700 Subject: [PATCH] Add service steps call to agent logic While the prior sevice steps patch had a huge portion of the needed code already due to copy-pasta, this change finishes wiring in the ability for the agent to be launched for service steps and heartbeat to occur, combined with support to retrieve service steps from the running agent, ultimately to enable operators to take a deployed node, and ask Ironic to make changes, or my more favorite use case, go benchmark it for a while. Also edits the service steps release note to remove the outstanding issue, and makes some minor corrections in the code which was copied but didn't quite have testing wired up yet. Change-Id: Ibfe42037b520a76539234cf1a5e19afd335ce8a8 --- ironic/drivers/modules/agent_base.py | 26 ++++-- ironic/drivers/modules/agent_client.py | 83 +++++++++++++++++++ ironic/drivers/modules/deploy_utils.py | 15 ++-- .../tests/unit/drivers/modules/test_agent.py | 30 +++++++ .../unit/drivers/modules/test_agent_client.py | 47 +++++++++++ .../unit/drivers/modules/test_deploy_utils.py | 69 +++++++++++++++ .../add-service-steps-deb45c9a0e77a647.yaml | 5 -- 7 files changed, 255 insertions(+), 20 deletions(-) diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index d5a66ce39a..03bc57cf65 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -809,17 +809,16 @@ class AgentBaseMixin(object): @METRICS.timer('AgentBaseMixin.prepare_cleaning') def prepare_service(self, task): - """Boot into the agent to prepare for cleaning. + """Boot into the agent to prepare for service. :param task: a TaskManager object containing the node - :raises: NodeCleaningFailure, NetworkError if the previous cleaning + :raises: NodeCleaningFailure, NetworkError if: the previous cleaning ports cannot be removed or if new cleaning ports cannot be created. :raises: InvalidParameterValue if cleaning network UUID config option has an invalid value. :returns: states.CLEANWAIT to signify an asynchronous prepare """ - result = deploy_utils.prepare_inband_service( - task, manage_boot=self.should_manage_boot(task)) + result = deploy_utils.prepare_inband_service(task) if result is None: # Fast-track, ensure the steps are available. self.refresh_steps(task, 'service') @@ -834,7 +833,7 @@ class AgentBaseMixin(object): be removed """ deploy_utils.tear_down_inband_service( - task, manage_boot=self.should_manage_boot(task)) + task) @METRICS.timer('AgentBaseMixin.get_clean_steps') def get_clean_steps(self, task): @@ -855,6 +854,23 @@ class AgentBaseMixin(object): task, 'clean', interface='deploy', override_priorities=new_priorities) + @METRICS.timer('AgentBaseMixin.get_service_steps') + def get_service_steps(self, task): + """Get the list of clean steps from the agent. + + :param task: a TaskManager object containing the node + :returns: A list of service step dictionaries, if an error + occurs, then an emtpy list is returned. + """ + new_priorities = { + 'erase_devices': CONF.deploy.erase_devices_priority, + 'erase_devices_metadata': + CONF.deploy.erase_devices_metadata_priority, + } + return get_steps( + task, 'service', + override_priorities=new_priorities) + @METRICS.timer('AgentBaseMixin.refresh_steps') def refresh_steps(self, task, step_type): """Refresh the node's cached clean/deploy steps from the booted agent. diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index e03153833e..386e4c6063 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -625,6 +625,89 @@ class AgentClient(object): method='deploy.execute_deploy_step', params=params) + @METRICS.timer('AgentClient.get_service_steps') + def get_service_steps(self, node, ports): + """Get service steps from agent. + + :param node: A node object. + :param ports: Ports associated with the node. + :raises: IronicException when failed to issue the request or there was + a malformed response from the agent. + :raises: AgentAPIError when agent failed to execute specified command. + :raises: AgentInProgress when the command fails to execute as the agent + is presently executing the prior command. + :returns: A dict containing command response from agent. + See :func:`get_commands_status` for a command result sample. + The value of key command_result is in the form of: + + :: + + { + 'service_steps': , + 'hardware_manager_version': + } + + """ + params = { + 'node': node.as_dict(secure=True), + 'ports': [port.as_dict() for port in ports] + } + try: + response = self._command(node=node, + method='service.get_service_steps', + params=params, + wait=True) + except exception.AgentAPIError: + # NOTE(TheJulia): This seems logical to do to handle an + # older ironic-python-agent, since the net-effect will be + # "there is nothing we can issue to the agent". + # TODO(TheJulia): Once we know the version where this *is* + # supported, we should actually update this log message. + # We won't know that until after the initial merge. + LOG.warning('Unable to retrieve service steps for node %s.' + 'Please upgrade your ironic-python-agent.', + node.uuid) + response = { + 'service_steps': [], + 'hardware_manager_version': 0, + } + return response + + @METRICS.timer('AgentClient.execute_service_step') + def execute_service_step(self, step, node, ports): + """Execute specified service step. + + :param step: A service step dictionary to execute. + :param node: A Node object. + :param ports: Ports associated with the node. + :raises: IronicException when failed to issue the request or there was + a malformed response from the agent. + :raises: AgentAPIError when agent failed to execute specified command. + :raises: AgentInProgress when the command fails to execute as the agent + is presently executing the prior command. + :returns: A dict containing command response from agent. + See :func:`get_commands_status` for a command result sample. + The value of key command_result is in the form of: + + :: + + { + 'service_result': , + 'service_step': + } + + """ + params = { + 'step': step, + 'node': node.as_dict(secure=True), + 'ports': [port.as_dict() for port in ports], + 'service_version': node.driver_internal_info.get( + 'hardware_manager_version') + } + return self._command(node=node, + method='service.execute_service_step', + params=params) + @METRICS.timer('AgentClient.get_partition_uuids') def get_partition_uuids(self, node): """Get deploy steps from agent. diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index 82086efcdf..010e384d1c 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -771,7 +771,7 @@ def tear_down_inband_cleaning(task, manage_boot=True): task, power_state_to_restore) -def prepare_inband_service(self, task): +def prepare_inband_service(task): """Boot a service ramdisk on the node. :param task: a TaskManager instance. @@ -796,7 +796,7 @@ def prepare_inband_service(self, task): task.driver.boot.clean_up_instance(task) with manager_utils.power_state_for_network_configuration(task): task.driver.network.unconfigure_tenant_networks(task) - task.driver.network.add_service_network(task) + task.driver.network.add_servicing_network(task) if CONF.agent.manage_agent_boot: # prepare_ramdisk will set the boot device prepare_agent_boot(task) @@ -805,7 +805,7 @@ def prepare_inband_service(self, task): return states.SERVICEWAIT -def tear_down_inband_service(task, manage_boot=True): +def tear_down_inband_service(task): """Tears down the environment setup for in-band service. This method does the following: @@ -818,9 +818,6 @@ def tear_down_inband_service(task, manage_boot=True): of cleaning. :param task: a TaskManager object containing the node - :param manage_boot: If this is set to True, this method calls the - 'clean_up_ramdisk' method of boot interface to boot the agent - ramdisk. If False, it skips this step. :raises: NetworkError, NodeServiceFailure if the cleaning ports cannot be removed. """ @@ -830,18 +827,16 @@ def tear_down_inband_service(task, manage_boot=True): if not service_failure: manager_utils.node_power_action(task, states.POWER_OFF) - if manage_boot: - task.driver.boot.clean_up_ramdisk(task) + task.driver.boot.clean_up_ramdisk(task) power_state_to_restore = manager_utils.power_on_node_if_needed(task) - task.driver.network.remove_service_network(task) if not service_failure: manager_utils.restore_power_state_if_needed( task, power_state_to_restore) with manager_utils.power_state_for_network_configuration(task): - task.driver.network.remove_service_network(task) + task.driver.network.remove_servicing_network(task) task.driver.network.configure_tenant_networks(task) task.driver.boot.prepare_instance(task) manager_utils.restore_power_state_if_needed( diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 653359f33d..059e84e5c6 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -1102,6 +1102,20 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase): 'erase_devices_metadata': None}) self.assertEqual(mock_steps, steps) + @mock.patch.object(agent_base, 'get_steps', autospec=True) + def test_get_service_steps(self, mock_get_steps): + # Test getting service steps + mock_steps = [{'priority': 10, 'interface': 'deploy', + 'step': 'erase_devices'}] + mock_get_steps.return_value = mock_steps + with task_manager.acquire(self.context, self.node.uuid) as task: + steps = self.driver.get_service_steps(task) + mock_get_steps.assert_called_once_with( + task, 'service', + override_priorities={'erase_devices': None, + 'erase_devices_metadata': None}) + self.assertEqual(mock_steps, steps) + @mock.patch.object(agent_base, 'get_steps', autospec=True) def test_get_clean_steps_config_priority(self, mock_get_steps): # Test that we can override the priority of get clean steps @@ -1128,6 +1142,14 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase): prepare_inband_cleaning_mock.assert_called_once_with( task, manage_boot=True) + @mock.patch.object(deploy_utils, 'prepare_inband_service', autospec=True) + def test_prepare_service(self, prepare_inband_service_mock): + prepare_inband_service_mock.return_value = states.SERVICEWAIT + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertEqual( + states.SERVICEWAIT, self.driver.prepare_service(task)) + prepare_inband_service_mock.assert_called_once_with(task) + @mock.patch.object(deploy_utils, 'prepare_inband_cleaning', autospec=True) def test_prepare_cleaning_manage_agent_boot_false( self, prepare_inband_cleaning_mock): @@ -1159,6 +1181,14 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase): tear_down_cleaning_mock.assert_called_once_with( task, manage_boot=True) + @mock.patch.object(deploy_utils, 'tear_down_inband_service', + autospec=True) + def test_tear_down_service(self, tear_down_service_mock): + with task_manager.acquire(self.context, self.node.uuid) as task: + self.driver.tear_down_service(task) + tear_down_service_mock.assert_called_once_with( + task) + @mock.patch.object(deploy_utils, 'tear_down_inband_cleaning', autospec=True) def test_tear_down_cleaning_manage_agent_boot_false( diff --git a/ironic/tests/unit/drivers/modules/test_agent_client.py b/ironic/tests/unit/drivers/modules/test_agent_client.py index 5bf93c71b7..89a95a66ab 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_client.py +++ b/ironic/tests/unit/drivers/modules/test_agent_client.py @@ -667,6 +667,53 @@ class TestAgentClient(base.TestCase): node=self.node, method='clean.execute_clean_step', params=expected_params) + def test_get_service_steps(self): + self.client._command = mock.MagicMock(spec_set=[]) + ports = [] + expected_params = { + 'node': self.node.as_dict(secure=True), + 'ports': [] + } + + self.client.get_service_steps(self.node, + ports) + self.client._command.assert_called_once_with( + node=self.node, method='service.get_service_steps', + params=expected_params, wait=True) + + def test_get_service_steps_older_client(self): + self.client._command = mock.MagicMock(spec_set=[]) + self.client._command.side_effect = exception.AgentAPIError('meow') + ports = [] + expected_params = { + 'node': self.node.as_dict(secure=True), + 'ports': [] + } + + self.client.get_service_steps(self.node, + ports) + self.client._command.assert_called_once_with( + node=self.node, method='service.get_service_steps', + params=expected_params, wait=True) + + def test_execute_service_step(self): + self.client._command = mock.MagicMock(spec_set=[]) + ports = [] + step = {'priority': 10, 'step': 'erase_devices', 'interface': 'deploy'} + expected_params = { + 'step': step, + 'node': self.node.as_dict(secure=True), + 'ports': [], + 'service_version': + self.node.driver_internal_info['hardware_manager_version'] + } + self.client.execute_service_step(step, + self.node, + ports) + self.client._command.assert_called_once_with( + node=self.node, method='service.execute_service_step', + params=expected_params) + def test_power_off(self): self.client._command = mock.MagicMock(spec_set=[]) self.client.power_off(self.node) diff --git a/ironic/tests/unit/drivers/modules/test_deploy_utils.py b/ironic/tests/unit/drivers/modules/test_deploy_utils.py index 7220697cbb..f9026055c8 100644 --- a/ironic/tests/unit/drivers/modules/test_deploy_utils.py +++ b/ironic/tests/unit/drivers/modules/test_deploy_utils.py @@ -987,6 +987,35 @@ class AgentMethodsTestCase(db_base.DbTestCase): build_options_mock.assert_called_once_with(task.node) self.assertFalse(power_on_if_needed_mock.called) + @mock.patch.object(pxe.PXEBoot, 'clean_up_instance', autospec=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True) + @mock.patch('ironic.conductor.utils.node_power_action', autospec=True) + @mock.patch.object(utils, 'build_agent_options', autospec=True) + @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.' + 'add_servicing_network', autospec=True) + def _test_prepare_inband_service( + self, add_service_network_mock, + build_options_mock, power_mock, prepare_ramdisk_mock, + clean_up_instance_mock, + manage_boot=True): + build_options_mock.return_value = {'a': 'b'} + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + result = utils.prepare_inband_service(task) + add_service_network_mock.assert_called_once_with( + task.driver.network, task) + self.assertEqual(states.SERVICEWAIT, result) + power_mock.assert_has_calls([ + mock.call(task, states.POWER_OFF), + mock.call(task, states.POWER_ON)]) + prepare_ramdisk_mock.assert_called_once_with( + mock.ANY, mock.ANY, {'a': 'b'}) + build_options_mock.assert_called_once_with(task.node) + clean_up_instance_mock.assert_called_once_with(mock.ANY, task) + + def test_prepare_inband_service(self): + self._test_prepare_inband_service() + @mock.patch('ironic.conductor.utils.is_fast_track', autospec=True) @mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True) @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.' @@ -1026,6 +1055,46 @@ class AgentMethodsTestCase(db_base.DbTestCase): def test_tear_down_inband_cleaning_cleaning_error(self): self._test_tear_down_inband_cleaning(cleaning_error=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True) + @mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True) + @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.' + 'remove_servicing_network', autospec=True) + @mock.patch('ironic.conductor.utils.node_power_action', autospec=True) + def test_tear_down_inband_service( + self, power_mock, remove_service_network_mock, + clean_up_ramdisk_mock, prepare_instance_mock): + # NOTE(TheJulia): This should be back to servicing upon a heartbeat + # operation, before we go back to WAIT. We wouldn't know to teardown + # in a wait state anyway. + self.node.provision_state = states.SERVICING + self.node.save() + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + utils.tear_down_inband_service(task) + power_mock.assert_called_once_with(task, states.POWER_OFF) + remove_service_network_mock.assert_called_once_with( + task.driver.network, task) + clean_up_ramdisk_mock.assert_called_once_with( + task.driver.boot, task) + prepare_instance_mock.assert_called_once_with(task.driver.boot, + task) + + @mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True) + @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.' + 'remove_servicing_network', autospec=True) + @mock.patch('ironic.conductor.utils.node_power_action', autospec=True) + def test_tear_down_inband_service_service_error( + self, power_mock, remove_service_network_mock, + clean_up_ramdisk_mock): + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + task.node.fault = faults.SERVICE_FAILURE + utils.tear_down_inband_service(task) + self.assertFalse(power_mock.called) + remove_service_network_mock.assert_not_called() + clean_up_ramdisk_mock.assert_called_once_with( + task.driver.boot, task) + def test_build_agent_options_conf(self): self.config(endpoint_override='https://api-url', group='service_catalog') diff --git a/releasenotes/notes/add-service-steps-deb45c9a0e77a647.yaml b/releasenotes/notes/add-service-steps-deb45c9a0e77a647.yaml index 74e2bb7981..3e69ef4988 100644 --- a/releasenotes/notes/add-service-steps-deb45c9a0e77a647.yaml +++ b/releasenotes/notes/add-service-steps-deb45c9a0e77a647.yaml @@ -12,8 +12,3 @@ features: exactly like the existing ``base.clean_step`` and ``base.deploy_step`` decorators. Driver methods which are decorated *can* be invoked utilizing the service steps. -issues: - - | - The ``service_steps`` functionality does not understand how to poll and - communicate with the ``ironic-python-agent``. This is anticipated to be - addressed in a future release.