Add support for verify steps

This change adds support for verify steps in Ironic. Verify steps
allow executing actions on transition from "verifying" to "managable"
state and can perform actions such as cleaning BMC job queue or
resetting the BMC on supported platforms. Verify steps are similar
to deploy and clean steps, just simpler.

Story: 2009025
Task: 42751
Change-Id: Iee27199a0315b8609e629bac272998c28274802b
This commit is contained in:
Jacob Anders 2021-07-08 19:49:49 +10:00
parent 62439d5633
commit b385d9ae5b
8 changed files with 389 additions and 2 deletions

View File

@ -830,3 +830,7 @@ class NodeHistoryNotFound(NotFound):
class IncorrectConfiguration(IronicException):
_msg_fmt = _("Supplied configuration is incorrect and must be fixed. "
"Error: %(error)s")
class NodeVerifyFailure(IronicException):
_msg_fmt = _("Failed to verify node %(node)s: %(reason)s")

View File

@ -50,6 +50,25 @@ DEPLOYING_INTERFACE_PRIORITY = {
'raid': 1,
}
VERIFYING_INTERFACE_PRIORITY = {
# When two verify steps have the same priority, their order is determined
# by which interface is implementing the verify step. The verifying step of
# the interface with the highest value here, will be executed first in
# that case.
'power': 12,
'management': 11,
'boot': 8,
'inspect': 10,
'deploy': 9,
'bios': 7,
'raid': 6,
'vendor': 5,
'network': 4,
'storage': 3,
'console': 2,
'rescue': 1,
}
def _clean_step_key(step):
"""Sort by priority, then interface priority in event of tie.
@ -69,6 +88,15 @@ def _deploy_step_key(step):
DEPLOYING_INTERFACE_PRIORITY[step.get('interface')])
def _verify_step_key(step):
"""Sort by priority, then interface priority in event of tie.
:param step: verify step dict to get priority for.
"""
return (step.get('priority'),
VERIFYING_INTERFACE_PRIORITY[step.get('interface')])
def _sorted_steps(steps, sort_step_key):
"""Return a sorted list of steps.
@ -194,6 +222,37 @@ def _get_deployment_steps(task, enabled=False, sort=True):
enabled=enabled, sort_step_key=sort_key)
def _get_verify_steps(task, enabled=False, sort=True):
"""Get verify steps for task.node.
:param task: A TaskManager object
:param enabled: If True, returns only enabled (priority > 0) steps. If
False, returns all verify steps.
:param sort: If True, the steps are sorted from highest priority to lowest
priority. For steps having the same priority, they are sorted from
highest interface priority to lowest.
:raises: NodeVerifyFailure if there was a problem getting the
verify steps.
:returns: A list of verify step dictionaries
"""
sort_key = _verify_step_key if sort else None
if CONF.conductor.verify_step_priority_override:
vsp_override = {}
for element in CONF.conductor.verify_step_priority_override:
vsp_override.update(element)
verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY,
'get_verify_steps', enabled=enabled,
sort_step_key=sort_key,
prio_overrides=vsp_override)
else:
verify_steps = _get_steps(task, VERIFYING_INTERFACE_PRIORITY,
'get_verify_steps', enabled=enabled,
sort_step_key=sort_key)
return verify_steps
def set_node_cleaning_steps(task, disable_ramdisk=False):
"""Set up the node with clean step information for cleaning.

View File

@ -631,6 +631,32 @@ def fail_on_error(error_callback, msg, *error_args, **error_kwargs):
return wrapper
def verifying_error_handler(task, logmsg, errmsg=None, traceback=False):
"""Handle errors during verification steps
:param task: the task
:param logmsg: message to be logged
:param errmsg: message for the user
:param traceback: Boolean; True to log a traceback
"""
errmsg = errmsg or logmsg
node = task.node
LOG.error(logmsg, exc_info=traceback)
node_history_record(node, event=errmsg, event_type=states.VERIFYING,
error=True)
node.save()
node.refresh()
if node.provision_state in (
states.VERIFYING):
# Clear verifying step; we leave the list of verify steps
# in node.driver_internal_info for debugging purposes.
node.verify_step = {}
node.save()
@task_manager.require_exclusive_lock
def abort_on_conductor_take_over(task):
"""Set node's state when a task was aborted due to conductor take over.

View File

@ -18,6 +18,7 @@ from ironic.common import exception
from ironic.common.i18n import _
from ironic.common import states
from ironic.conductor import notification_utils as notify_utils
from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
from ironic.conductor import utils
@ -49,6 +50,37 @@ def do_node_verify(task):
{'node': node.uuid, 'msg': e})
log_traceback = not isinstance(e, exception.IronicException)
LOG.error(error, exc_info=log_traceback)
verify_steps = conductor_steps._get_verify_steps(task,
enabled=True,
sort=True)
for step in verify_steps:
interface = getattr(task.driver, step.get('interface'))
LOG.info('Executing %(step)s on node %(node)s',
{'step': step, 'node': node.uuid})
try:
result = interface.execute_verify_step(task, step)
except Exception as e:
error = ('Node %(node)s failed verify step %(step)s '
'with unexpected error: %(err)s' %
{'node': node.uuid, 'step': node.verify_step,
'err': e})
utils.verifying_error_handler(
task, error,
_("Failed to verify. Exception: %s") % e,
traceback=True)
if result is not None:
# NOTE(rloo): This is an internal/dev error; shouldn't
# happen.
error = (_('While executing verify step %(step)s on '
'node %(node)s, step returned unexpected '
'state: %(val)s')
% {'step': step, 'node': node.uuid,
'val': result})
utils.verifying_error_handler(
task, error,
_("Failed to verify: %s") % node.deploy_step)
if error is None:
# NOTE(janders) this can eventually move to driver-specific
# verify steps, will leave this for a follow-up change.
@ -67,6 +99,10 @@ def do_node_verify(task):
else:
task.process_event('done')
LOG.info('Successfully verified node %(node)s',
{'node': node.uuid})
else:
utils.node_history_record(task.node, event=error,
event_type=states.VERIFY,

View File

@ -334,6 +334,16 @@ opts = [
'node_history_max_entries setting as users of '
'this setting are anticipated to need to retain '
'history by policy.')),
cfg.MultiOpt('verify_step_priority_override',
item_type=types.Dict(),
default={},
mutable=True,
help=_('Priority to run automated verify steps '
'provided in interface.step_name:priority format,'
'e.g. management.clear_job_queue:123. The option can '
'be specified multiple times to define priorities '
'for multiple steps. If set to 0, this specific step '
'will not run during cleaning. ')),
]

View File

@ -226,8 +226,9 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
"""
def __new__(cls, *args, **kwargs):
# Get the list of clean steps and deploy steps, when the interface is
# initialized by the conductor. We use __new__ instead of __init___
# Get the list of clean steps, deploy steps and verify steps when the
# interface is initialized by the conductor.
# We use __new__ instead of __init___
# to avoid breaking backwards compatibility with all the drivers.
# We want to return all steps, regardless of priority.
@ -238,6 +239,7 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
instance = super_new(cls, *args, **kwargs)
instance.clean_steps = []
instance.deploy_steps = []
instance.verify_steps = []
for n, method in inspect.getmembers(instance, inspect.ismethod):
if getattr(method, '_is_clean_step', False):
# Create a CleanStep to represent this method
@ -256,6 +258,13 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
'argsinfo': method._deploy_step_argsinfo,
'interface': instance.interface_type}
instance.deploy_steps.append(step)
if getattr(method, '_is_verify_step', False):
# Create a VerifyStep to represent this method
step = {'step': method.__name__,
'priority': method._verify_step_priority,
'interface': instance.interface_type}
instance.verify_steps.append(step)
if instance.clean_steps:
LOG.debug('Found clean steps %(steps)s for interface '
'%(interface)s',
@ -266,6 +275,12 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
'%(interface)s',
{'steps': instance.deploy_steps,
'interface': instance.interface_type})
if instance.verify_steps:
LOG.debug('Found verify steps %(steps)s for interface '
'%(interface)s',
{'steps': instance.deploy_steps,
'interface': instance.interface_type})
return instance
def _execute_step(self, task, step):
@ -360,6 +375,35 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
"""
return self._execute_step(task, step)
def get_verify_steps(self, task):
"""Get a list of (enabled and disabled) verify steps for the interface.
This function will return all verify steps (both enabled and disabled)
for the interface, in an unordered list.
:param task: A TaskManager object, useful for interfaces overriding
this function
:raises NodeVerifyFailure: if there is a problem getting the steps
from the driver. For example, when a node (using an agent driver)
has just been enrolled and the agent isn't alive yet to be queried
for the available verify steps.
:returns: A list of deploy step dictionaries
"""
return self.verify_steps
def execute_verify_step(self, task, step):
"""Execute the verify step on task.node.
A verify step must take a single positional argument: a TaskManager
object. It does not take keyword variable arguments.
:param task: A TaskManager object
:param step: The deploy step dictionary representing the step to
execute
:returns: None if this method has completed synchronously
"""
return self._execute_step(task, step)
class DeployInterface(BaseInterface):
"""Interface for deploy-related actions."""
@ -1868,3 +1912,44 @@ def deploy_step(priority, argsinfo=None):
func._deploy_step_argsinfo = argsinfo
return func
return decorator
def verify_step(priority):
"""Decorator for verify steps.
Only steps with priorities greater than 0 are used. These steps are
ordered by priority from highest value to lowest value.
For steps with the same priority, they are ordered by driver
interface priority (see conductor.steps.VERIFY_INTERFACE_PRIORITY).
execute_verify_step() will be called on each step.
Decorated verify steps must take as the only positional argument, a
TaskManager object.
Verify steps are synchronous and should return `None` when finished,
and the conductor will continue on to the next step. While the verify step
is executing, the node will be in `states.VERIFYING` provision state.
Examples::
class MyInterface(base.BaseInterface):
@base.verify_step(priority=100)
def example_verifying(self, task):
# do some verifying
:param priority: an integer (>=0) priority; used for determining the order
in which the step is run in the verification process.
:raises InvalidParameterValue: if any of the arguments are invalid
"""
def decorator(func):
func._is_verify_step = True
if isinstance(priority, int) and priority >= 0:
func._verify_step_priority = priority
else:
raise exception.InvalidParameterValue(
_('"priority" must be an integer value >= 0, instead of "%s"')
% priority)
return func
return decorator

View File

@ -19,6 +19,7 @@ from ironic.common import exception
from ironic.common import states
from ironic.conductor import steps as conductor_steps
from ironic.conductor import task_manager
from ironic.conductor import verify as verify_steps
from ironic import objects
from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.db import utils as db_utils
@ -1138,3 +1139,163 @@ class ValidateUserDeployStepsTestCase(db_base.DbTestCase):
result = conductor_steps._get_validated_user_deploy_steps(task)
self.assertEqual([], result)
mock_validated.assert_not_called()
class NodeVerifyStepsTestCase(db_base.DbTestCase):
def setUp(self):
super(NodeVerifyStepsTestCase, self).setUp()
self.management_reset_idrac = {
'step': 'reset_idrac', 'priority': 0,
'interface': 'management'}
self.management_clear_job_queue = {
'step': 'clear_job_queue', 'priority': 10,
'interface': 'management'}
self.verify_steps = [self.management_clear_job_queue,
self.management_reset_idrac]
self.verify_steps_only_enabled = [self.management_clear_job_queue]
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_priority_override_ok(self,
mock_verify_steps):
# Test verify_step_priority_override
cfg.CONF.set_override('verify_step_priority_override',
["management.clear_job_queue:9"],
'conductor')
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=True) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
expected_step_priorities = [{'interface': 'management', 'priority': 9,
'step': 'clear_job_queue'}]
self.assertEqual(expected_step_priorities, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_priority_override_fail(self,
mock_verify_steps):
# Test verify_step_priority_override for failure
cfg.CONF.set_override('verify_step_priority_override',
["management.clear_job_queue:9"],
'conductor')
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=True) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
expected_step_priorities = [{'interface': 'management', 'priority': 10,
'step': 'clear_job_queue'}]
self.assertNotEqual(expected_step_priorities, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_priority_override_off(self,
mock_verify_steps):
# Test disabling steps with verify_step_priority_override
cfg.CONF.set_override('verify_step_priority_override',
["management.clear_job_queue:0"],
'conductor')
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=True) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
expected_step_priorities = []
self.assertEqual(expected_step_priorities, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps(self, mock_verify_steps):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue,
self.management_reset_idrac]
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
steps = conductor_steps._get_verify_steps(task, enabled=False)
self.assertEqual(self.verify_steps, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_unsorted(self, mock_verify_steps):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue,
self.management_reset_idrac]
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
steps = conductor_steps._get_verify_steps(task, enabled=False,
sort=False)
self.assertEqual(self.verify_steps, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_verify_steps',
autospec=True)
def test__get_verify_steps_only_enabled(self, mock_verify_steps):
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.ENROLL,
target_provision_state=states.MANAGEABLE)
mock_verify_steps.return_value = [self.management_clear_job_queue]
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
steps = conductor_steps._get_verify_steps(task, enabled=True)
self.assertEqual(self.verify_steps_only_enabled, steps)
@mock.patch('ironic.drivers.modules.fake.FakeManagement.'
'execute_verify_step',
autospec=True)
@mock.patch.object(conductor_steps, '_get_verify_steps', autospec=True)
def test_execute_verify_step(self, mock_steps, mock_execute):
mock_steps.return_value = self.verify_steps
node = obj_utils.create_test_node(
self.context, driver='fake-hardware',
provision_state=states.VERIFYING,
target_provision_state=states.MANAGEABLE,
last_error=None,
clean_step=None)
with task_manager.acquire(
self.context, node.uuid, shared=False) as task:
verify_steps.do_node_verify(task)
node.refresh()
self.assertTrue(mock_execute.called)

View File

@ -0,0 +1,6 @@
---
features:
- |
Adds support for verify steps - a mechanism for running optional,
actions pre-defined in the driver while the node is in transition from
enroll to managable state, prior to inspection.