Add service steps call to agent logic

While the prior sevice steps patch had a huge portion of the
needed code already due to copy-pasta, this change finishes
wiring in the ability for the agent to be launched for service
steps and heartbeat to occur, combined with support to retrieve
service steps from the running agent, ultimately to enable
operators to take a deployed node, and ask Ironic to make changes,
or my more favorite use case, go benchmark it for a while.

Also edits the service steps release note to remove the outstanding
issue, and makes some minor corrections in the code which was copied
but didn't quite have testing wired up yet.

Change-Id: Ibfe42037b520a76539234cf1a5e19afd335ce8a8
This commit is contained in:
Julia Kreger 2023-08-11 12:44:40 -07:00 committed by Jay Faulkner
parent 2366a4b86e
commit e1a0864635
7 changed files with 255 additions and 20 deletions

View File

@ -809,17 +809,16 @@ class AgentBaseMixin(object):
@METRICS.timer('AgentBaseMixin.prepare_cleaning') @METRICS.timer('AgentBaseMixin.prepare_cleaning')
def prepare_service(self, task): def prepare_service(self, task):
"""Boot into the agent to prepare for cleaning. """Boot into the agent to prepare for service.
:param task: a TaskManager object containing the node :param task: a TaskManager object containing the node
:raises: NodeCleaningFailure, NetworkError if the previous cleaning :raises: NodeCleaningFailure, NetworkError if: the previous cleaning
ports cannot be removed or if new cleaning ports cannot be created. ports cannot be removed or if new cleaning ports cannot be created.
:raises: InvalidParameterValue if cleaning network UUID config option :raises: InvalidParameterValue if cleaning network UUID config option
has an invalid value. has an invalid value.
:returns: states.CLEANWAIT to signify an asynchronous prepare :returns: states.CLEANWAIT to signify an asynchronous prepare
""" """
result = deploy_utils.prepare_inband_service( result = deploy_utils.prepare_inband_service(task)
task, manage_boot=self.should_manage_boot(task))
if result is None: if result is None:
# Fast-track, ensure the steps are available. # Fast-track, ensure the steps are available.
self.refresh_steps(task, 'service') self.refresh_steps(task, 'service')
@ -834,7 +833,7 @@ class AgentBaseMixin(object):
be removed be removed
""" """
deploy_utils.tear_down_inband_service( deploy_utils.tear_down_inband_service(
task, manage_boot=self.should_manage_boot(task)) task)
@METRICS.timer('AgentBaseMixin.get_clean_steps') @METRICS.timer('AgentBaseMixin.get_clean_steps')
def get_clean_steps(self, task): def get_clean_steps(self, task):
@ -855,6 +854,23 @@ class AgentBaseMixin(object):
task, 'clean', interface='deploy', task, 'clean', interface='deploy',
override_priorities=new_priorities) override_priorities=new_priorities)
@METRICS.timer('AgentBaseMixin.get_service_steps')
def get_service_steps(self, task):
"""Get the list of clean steps from the agent.
:param task: a TaskManager object containing the node
:returns: A list of service step dictionaries, if an error
occurs, then an emtpy list is returned.
"""
new_priorities = {
'erase_devices': CONF.deploy.erase_devices_priority,
'erase_devices_metadata':
CONF.deploy.erase_devices_metadata_priority,
}
return get_steps(
task, 'service',
override_priorities=new_priorities)
@METRICS.timer('AgentBaseMixin.refresh_steps') @METRICS.timer('AgentBaseMixin.refresh_steps')
def refresh_steps(self, task, step_type): def refresh_steps(self, task, step_type):
"""Refresh the node's cached clean/deploy steps from the booted agent. """Refresh the node's cached clean/deploy steps from the booted agent.

View File

@ -625,6 +625,89 @@ class AgentClient(object):
method='deploy.execute_deploy_step', method='deploy.execute_deploy_step',
params=params) params=params)
@METRICS.timer('AgentClient.get_service_steps')
def get_service_steps(self, node, ports):
"""Get service steps from agent.
:param node: A node object.
:param ports: Ports associated with the node.
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
The value of key command_result is in the form of:
::
{
'service_steps': <a list of service steps>,
'hardware_manager_version': <manager version>
}
"""
params = {
'node': node.as_dict(secure=True),
'ports': [port.as_dict() for port in ports]
}
try:
response = self._command(node=node,
method='service.get_service_steps',
params=params,
wait=True)
except exception.AgentAPIError:
# NOTE(TheJulia): This seems logical to do to handle an
# older ironic-python-agent, since the net-effect will be
# "there is nothing we can issue to the agent".
# TODO(TheJulia): Once we know the version where this *is*
# supported, we should actually update this log message.
# We won't know that until after the initial merge.
LOG.warning('Unable to retrieve service steps for node %s.'
'Please upgrade your ironic-python-agent.',
node.uuid)
response = {
'service_steps': [],
'hardware_manager_version': 0,
}
return response
@METRICS.timer('AgentClient.execute_service_step')
def execute_service_step(self, step, node, ports):
"""Execute specified service step.
:param step: A service step dictionary to execute.
:param node: A Node object.
:param ports: Ports associated with the node.
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
:returns: A dict containing command response from agent.
See :func:`get_commands_status` for a command result sample.
The value of key command_result is in the form of:
::
{
'service_result': <the result of execution, step specific>,
'service_step': <the service step issued to agent>
}
"""
params = {
'step': step,
'node': node.as_dict(secure=True),
'ports': [port.as_dict() for port in ports],
'service_version': node.driver_internal_info.get(
'hardware_manager_version')
}
return self._command(node=node,
method='service.execute_service_step',
params=params)
@METRICS.timer('AgentClient.get_partition_uuids') @METRICS.timer('AgentClient.get_partition_uuids')
def get_partition_uuids(self, node): def get_partition_uuids(self, node):
"""Get deploy steps from agent. """Get deploy steps from agent.

View File

@ -771,7 +771,7 @@ def tear_down_inband_cleaning(task, manage_boot=True):
task, power_state_to_restore) task, power_state_to_restore)
def prepare_inband_service(self, task): def prepare_inband_service(task):
"""Boot a service ramdisk on the node. """Boot a service ramdisk on the node.
:param task: a TaskManager instance. :param task: a TaskManager instance.
@ -796,7 +796,7 @@ def prepare_inband_service(self, task):
task.driver.boot.clean_up_instance(task) task.driver.boot.clean_up_instance(task)
with manager_utils.power_state_for_network_configuration(task): with manager_utils.power_state_for_network_configuration(task):
task.driver.network.unconfigure_tenant_networks(task) task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_service_network(task) task.driver.network.add_servicing_network(task)
if CONF.agent.manage_agent_boot: if CONF.agent.manage_agent_boot:
# prepare_ramdisk will set the boot device # prepare_ramdisk will set the boot device
prepare_agent_boot(task) prepare_agent_boot(task)
@ -805,7 +805,7 @@ def prepare_inband_service(self, task):
return states.SERVICEWAIT return states.SERVICEWAIT
def tear_down_inband_service(task, manage_boot=True): def tear_down_inband_service(task):
"""Tears down the environment setup for in-band service. """Tears down the environment setup for in-band service.
This method does the following: This method does the following:
@ -818,9 +818,6 @@ def tear_down_inband_service(task, manage_boot=True):
of cleaning. of cleaning.
:param task: a TaskManager object containing the node :param task: a TaskManager object containing the node
:param manage_boot: If this is set to True, this method calls the
'clean_up_ramdisk' method of boot interface to boot the agent
ramdisk. If False, it skips this step.
:raises: NetworkError, NodeServiceFailure if the cleaning ports cannot be :raises: NetworkError, NodeServiceFailure if the cleaning ports cannot be
removed. removed.
""" """
@ -830,18 +827,16 @@ def tear_down_inband_service(task, manage_boot=True):
if not service_failure: if not service_failure:
manager_utils.node_power_action(task, states.POWER_OFF) manager_utils.node_power_action(task, states.POWER_OFF)
if manage_boot: task.driver.boot.clean_up_ramdisk(task)
task.driver.boot.clean_up_ramdisk(task)
power_state_to_restore = manager_utils.power_on_node_if_needed(task) power_state_to_restore = manager_utils.power_on_node_if_needed(task)
task.driver.network.remove_service_network(task)
if not service_failure: if not service_failure:
manager_utils.restore_power_state_if_needed( manager_utils.restore_power_state_if_needed(
task, power_state_to_restore) task, power_state_to_restore)
with manager_utils.power_state_for_network_configuration(task): with manager_utils.power_state_for_network_configuration(task):
task.driver.network.remove_service_network(task) task.driver.network.remove_servicing_network(task)
task.driver.network.configure_tenant_networks(task) task.driver.network.configure_tenant_networks(task)
task.driver.boot.prepare_instance(task) task.driver.boot.prepare_instance(task)
manager_utils.restore_power_state_if_needed( manager_utils.restore_power_state_if_needed(

View File

@ -1102,6 +1102,20 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
'erase_devices_metadata': None}) 'erase_devices_metadata': None})
self.assertEqual(mock_steps, steps) self.assertEqual(mock_steps, steps)
@mock.patch.object(agent_base, 'get_steps', autospec=True)
def test_get_service_steps(self, mock_get_steps):
# Test getting service steps
mock_steps = [{'priority': 10, 'interface': 'deploy',
'step': 'erase_devices'}]
mock_get_steps.return_value = mock_steps
with task_manager.acquire(self.context, self.node.uuid) as task:
steps = self.driver.get_service_steps(task)
mock_get_steps.assert_called_once_with(
task, 'service',
override_priorities={'erase_devices': None,
'erase_devices_metadata': None})
self.assertEqual(mock_steps, steps)
@mock.patch.object(agent_base, 'get_steps', autospec=True) @mock.patch.object(agent_base, 'get_steps', autospec=True)
def test_get_clean_steps_config_priority(self, mock_get_steps): def test_get_clean_steps_config_priority(self, mock_get_steps):
# Test that we can override the priority of get clean steps # Test that we can override the priority of get clean steps
@ -1128,6 +1142,14 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
prepare_inband_cleaning_mock.assert_called_once_with( prepare_inband_cleaning_mock.assert_called_once_with(
task, manage_boot=True) task, manage_boot=True)
@mock.patch.object(deploy_utils, 'prepare_inband_service', autospec=True)
def test_prepare_service(self, prepare_inband_service_mock):
prepare_inband_service_mock.return_value = states.SERVICEWAIT
with task_manager.acquire(self.context, self.node.uuid) as task:
self.assertEqual(
states.SERVICEWAIT, self.driver.prepare_service(task))
prepare_inband_service_mock.assert_called_once_with(task)
@mock.patch.object(deploy_utils, 'prepare_inband_cleaning', autospec=True) @mock.patch.object(deploy_utils, 'prepare_inband_cleaning', autospec=True)
def test_prepare_cleaning_manage_agent_boot_false( def test_prepare_cleaning_manage_agent_boot_false(
self, prepare_inband_cleaning_mock): self, prepare_inband_cleaning_mock):
@ -1159,6 +1181,14 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
tear_down_cleaning_mock.assert_called_once_with( tear_down_cleaning_mock.assert_called_once_with(
task, manage_boot=True) task, manage_boot=True)
@mock.patch.object(deploy_utils, 'tear_down_inband_service',
autospec=True)
def test_tear_down_service(self, tear_down_service_mock):
with task_manager.acquire(self.context, self.node.uuid) as task:
self.driver.tear_down_service(task)
tear_down_service_mock.assert_called_once_with(
task)
@mock.patch.object(deploy_utils, 'tear_down_inband_cleaning', @mock.patch.object(deploy_utils, 'tear_down_inband_cleaning',
autospec=True) autospec=True)
def test_tear_down_cleaning_manage_agent_boot_false( def test_tear_down_cleaning_manage_agent_boot_false(

View File

@ -667,6 +667,53 @@ class TestAgentClient(base.TestCase):
node=self.node, method='clean.execute_clean_step', node=self.node, method='clean.execute_clean_step',
params=expected_params) params=expected_params)
def test_get_service_steps(self):
self.client._command = mock.MagicMock(spec_set=[])
ports = []
expected_params = {
'node': self.node.as_dict(secure=True),
'ports': []
}
self.client.get_service_steps(self.node,
ports)
self.client._command.assert_called_once_with(
node=self.node, method='service.get_service_steps',
params=expected_params, wait=True)
def test_get_service_steps_older_client(self):
self.client._command = mock.MagicMock(spec_set=[])
self.client._command.side_effect = exception.AgentAPIError('meow')
ports = []
expected_params = {
'node': self.node.as_dict(secure=True),
'ports': []
}
self.client.get_service_steps(self.node,
ports)
self.client._command.assert_called_once_with(
node=self.node, method='service.get_service_steps',
params=expected_params, wait=True)
def test_execute_service_step(self):
self.client._command = mock.MagicMock(spec_set=[])
ports = []
step = {'priority': 10, 'step': 'erase_devices', 'interface': 'deploy'}
expected_params = {
'step': step,
'node': self.node.as_dict(secure=True),
'ports': [],
'service_version':
self.node.driver_internal_info['hardware_manager_version']
}
self.client.execute_service_step(step,
self.node,
ports)
self.client._command.assert_called_once_with(
node=self.node, method='service.execute_service_step',
params=expected_params)
def test_power_off(self): def test_power_off(self):
self.client._command = mock.MagicMock(spec_set=[]) self.client._command = mock.MagicMock(spec_set=[])
self.client.power_off(self.node) self.client.power_off(self.node)

View File

@ -987,6 +987,35 @@ class AgentMethodsTestCase(db_base.DbTestCase):
build_options_mock.assert_called_once_with(task.node) build_options_mock.assert_called_once_with(task.node)
self.assertFalse(power_on_if_needed_mock.called) self.assertFalse(power_on_if_needed_mock.called)
@mock.patch.object(pxe.PXEBoot, 'clean_up_instance', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch.object(utils, 'build_agent_options', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
'add_servicing_network', autospec=True)
def _test_prepare_inband_service(
self, add_service_network_mock,
build_options_mock, power_mock, prepare_ramdisk_mock,
clean_up_instance_mock,
manage_boot=True):
build_options_mock.return_value = {'a': 'b'}
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
result = utils.prepare_inband_service(task)
add_service_network_mock.assert_called_once_with(
task.driver.network, task)
self.assertEqual(states.SERVICEWAIT, result)
power_mock.assert_has_calls([
mock.call(task, states.POWER_OFF),
mock.call(task, states.POWER_ON)])
prepare_ramdisk_mock.assert_called_once_with(
mock.ANY, mock.ANY, {'a': 'b'})
build_options_mock.assert_called_once_with(task.node)
clean_up_instance_mock.assert_called_once_with(mock.ANY, task)
def test_prepare_inband_service(self):
self._test_prepare_inband_service()
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True) @mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True) @mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.' @mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
@ -1026,6 +1055,46 @@ class AgentMethodsTestCase(db_base.DbTestCase):
def test_tear_down_inband_cleaning_cleaning_error(self): def test_tear_down_inband_cleaning_cleaning_error(self):
self._test_tear_down_inband_cleaning(cleaning_error=True) self._test_tear_down_inband_cleaning(cleaning_error=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
'remove_servicing_network', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_tear_down_inband_service(
self, power_mock, remove_service_network_mock,
clean_up_ramdisk_mock, prepare_instance_mock):
# NOTE(TheJulia): This should be back to servicing upon a heartbeat
# operation, before we go back to WAIT. We wouldn't know to teardown
# in a wait state anyway.
self.node.provision_state = states.SERVICING
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
utils.tear_down_inband_service(task)
power_mock.assert_called_once_with(task, states.POWER_OFF)
remove_service_network_mock.assert_called_once_with(
task.driver.network, task)
clean_up_ramdisk_mock.assert_called_once_with(
task.driver.boot, task)
prepare_instance_mock.assert_called_once_with(task.driver.boot,
task)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
'remove_servicing_network', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_tear_down_inband_service_service_error(
self, power_mock, remove_service_network_mock,
clean_up_ramdisk_mock):
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.fault = faults.SERVICE_FAILURE
utils.tear_down_inband_service(task)
self.assertFalse(power_mock.called)
remove_service_network_mock.assert_not_called()
clean_up_ramdisk_mock.assert_called_once_with(
task.driver.boot, task)
def test_build_agent_options_conf(self): def test_build_agent_options_conf(self):
self.config(endpoint_override='https://api-url', self.config(endpoint_override='https://api-url',
group='service_catalog') group='service_catalog')

View File

@ -12,8 +12,3 @@ features:
exactly like the existing ``base.clean_step`` and ``base.deploy_step`` exactly like the existing ``base.clean_step`` and ``base.deploy_step``
decorators. Driver methods which are decorated *can* be invoked utilizing decorators. Driver methods which are decorated *can* be invoked utilizing
the service steps. the service steps.
issues:
- |
The ``service_steps`` functionality does not understand how to poll and
communicate with the ``ironic-python-agent``. This is anticipated to be
addressed in a future release.