Adds service steps
A huge list of initial work for service steps * Adds service_step verb * Adds service_step db/object/API field on the node object for the status. * Increments the API version to 1.87 for both changes. * Increments the RPC API version to 1.57. * Adds initial testing to facilitate ensurance that supplied steps are passed through and executed upon. Does not: * Have tests for starting the agent ramdisk, although this is relatively boiler plate. * Have a collection of pre-decorated steps available for immediate consumption. Change-Id: I5b9dd928f24dff7877a4ab8dc7b743058cace994
This commit is contained in:
parent
8e2aab8291
commit
2366a4b86e
@ -2,6 +2,19 @@
|
||||
REST API Version History
|
||||
========================
|
||||
|
||||
1.87 (Bobcat)
|
||||
-------------
|
||||
|
||||
Adds the ``service`` provision state verb to allow modifications
|
||||
via the "steps" interface to occur with a baremetal node. With this
|
||||
functionality comes a ``service_step`` field on the ``/v1/nodes``
|
||||
based resources, which indicates the current step.
|
||||
|
||||
1.86 (Bobcat)
|
||||
-------------
|
||||
|
||||
Adds a ``firmware_interface`` field to the ``/v1/nodes`` resources.
|
||||
|
||||
1.85 (Bobcat)
|
||||
-------------
|
||||
|
||||
|
@ -56,9 +56,12 @@ from ironic import objects
|
||||
CONF = ironic.conf.CONF
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
_CLEAN_STEPS_SCHEMA = {
|
||||
|
||||
|
||||
# TODO(TheJulia): We *really* need to just have *one* schema.
|
||||
_STEPS_SCHEMA = {
|
||||
"$schema": "http://json-schema.org/schema#",
|
||||
"title": "Clean steps schema",
|
||||
"title": "Steps schema",
|
||||
"type": "array",
|
||||
# list of clean steps
|
||||
"items": {
|
||||
@ -124,7 +127,8 @@ PROVISION_ACTION_STATES = (ir_states.VERBS['manage'],
|
||||
ir_states.VERBS['provide'],
|
||||
ir_states.VERBS['abort'],
|
||||
ir_states.VERBS['adopt'],
|
||||
ir_states.VERBS['unhold'])
|
||||
ir_states.VERBS['unhold'],
|
||||
ir_states.VERBS['service'])
|
||||
|
||||
_NODES_CONTROLLER_RESERVED_WORDS = None
|
||||
|
||||
@ -950,7 +954,8 @@ class NodeStatesController(rest.RestController):
|
||||
|
||||
def _do_provision_action(self, rpc_node, target, configdrive=None,
|
||||
clean_steps=None, deploy_steps=None,
|
||||
rescue_password=None, disable_ramdisk=None):
|
||||
rescue_password=None, disable_ramdisk=None,
|
||||
service_steps=None):
|
||||
topic = api.request.rpcapi.get_topic_for(rpc_node)
|
||||
# Note that there is a race condition. The node state(s) could change
|
||||
# by the time the RPC call is made and the TaskManager manager gets a
|
||||
@ -993,6 +998,17 @@ class NodeStatesController(rest.RestController):
|
||||
api.request.rpcapi.do_node_clean(
|
||||
api.request.context, rpc_node.uuid, clean_steps,
|
||||
disable_ramdisk, topic=topic)
|
||||
elif target == ir_states.VERBS['service']:
|
||||
if not service_steps:
|
||||
msg = (_('"service_steps" is required when setting '
|
||||
'target provision state to '
|
||||
'%s') % ir_states.VERBS['service'])
|
||||
raise exception.ClientSideError(
|
||||
msg, status_code=http_client.BAD_REQUEST)
|
||||
_check_service_steps(service_steps)
|
||||
api.request.rpcapi.do_node_service(
|
||||
api.request.context, rpc_node.uuid, service_steps,
|
||||
disable_ramdisk, topic=topic)
|
||||
elif target in PROVISION_ACTION_STATES:
|
||||
api.request.rpcapi.do_provisioning_action(
|
||||
api.request.context, rpc_node.uuid, target, topic)
|
||||
@ -1008,10 +1024,12 @@ class NodeStatesController(rest.RestController):
|
||||
clean_steps=args.types(type(None), list),
|
||||
deploy_steps=args.types(type(None), list),
|
||||
rescue_password=args.string,
|
||||
disable_ramdisk=args.boolean)
|
||||
disable_ramdisk=args.boolean,
|
||||
service_steps=args.types(type(None), list))
|
||||
def provision(self, node_ident, target, configdrive=None,
|
||||
clean_steps=None, deploy_steps=None,
|
||||
rescue_password=None, disable_ramdisk=None):
|
||||
rescue_password=None, disable_ramdisk=None,
|
||||
service_steps=None):
|
||||
"""Asynchronous trigger the provisioning of the node.
|
||||
|
||||
This will set the target provision state of the node, and a
|
||||
@ -1069,11 +1087,31 @@ class NodeStatesController(rest.RestController):
|
||||
inside the rescue environment. This is required (and only valid),
|
||||
when target is "rescue".
|
||||
:param disable_ramdisk: Whether to skip booting ramdisk for cleaning.
|
||||
:param service_steps: A list of service steps that will be performed on
|
||||
the node. A service step is a dictionary with required keys
|
||||
'interface', 'step', 'priority' and 'args'. If specified, the value
|
||||
for 'args' is a keyword variable argument dictionary that is passed
|
||||
to the service step method.::
|
||||
|
||||
{ 'interface': <driver_interface>,
|
||||
'step': <name_of_service_step>,
|
||||
'args': {<arg1>: <value1>, ..., <argn>: <valuen>}
|
||||
'priority': <integer>}
|
||||
|
||||
For example (this isn't a real example, this service step doesn't
|
||||
exist)::
|
||||
|
||||
{ 'interface': 'deploy',
|
||||
'step': 'upgrade_firmware',
|
||||
'args': {'force': True},
|
||||
'priority': 90 }
|
||||
|
||||
:raises: NodeLocked (HTTP 409) if the node is currently locked.
|
||||
:raises: ClientSideError (HTTP 409) if the node is already being
|
||||
provisioned.
|
||||
:raises: InvalidParameterValue (HTTP 400), if validation of
|
||||
clean_steps, deploy_steps or power driver interface fails.
|
||||
clean_steps, deploy_steps, service_steps or power driver
|
||||
interface fails.
|
||||
:raises: InvalidStateRequested (HTTP 400) if the requested transition
|
||||
is not possible from the current state.
|
||||
:raises: NodeInMaintenance (HTTP 400), if operation cannot be
|
||||
@ -1140,9 +1178,13 @@ class NodeStatesController(rest.RestController):
|
||||
if not api_utils.allow_unhold_verb():
|
||||
raise exception.NotAcceptable()
|
||||
|
||||
if target == ir_states.VERBS['service']:
|
||||
if not api_utils.allow_service_verb():
|
||||
raise exception.NotAcceptable()
|
||||
|
||||
self._do_provision_action(rpc_node, target, configdrive, clean_steps,
|
||||
deploy_steps, rescue_password,
|
||||
disable_ramdisk)
|
||||
disable_ramdisk, service_steps)
|
||||
|
||||
# Set the HTTP Location Header
|
||||
url_args = '/'.join([node_ident, 'states'])
|
||||
@ -1156,7 +1198,7 @@ def _check_clean_steps(clean_steps):
|
||||
clean_steps parameter of :func:`NodeStatesController.provision`.
|
||||
:raises: InvalidParameterValue if validation of steps fails.
|
||||
"""
|
||||
_check_steps(clean_steps, 'clean', _CLEAN_STEPS_SCHEMA)
|
||||
_check_steps(clean_steps, 'clean', _STEPS_SCHEMA)
|
||||
|
||||
|
||||
def _check_deploy_steps(deploy_steps):
|
||||
@ -1169,6 +1211,16 @@ def _check_deploy_steps(deploy_steps):
|
||||
_check_steps(deploy_steps, 'deploy', _DEPLOY_STEPS_SCHEMA)
|
||||
|
||||
|
||||
def _check_service_steps(service_steps):
|
||||
"""Ensure all necessary keys are present and correct in steps for service
|
||||
|
||||
:param service_steps: a list of steps. For more details, see the
|
||||
service_steps parameter of :func:`NodeStatesController.provision`.
|
||||
:raises: InvalidParameterValue if validation of steps fails.
|
||||
"""
|
||||
_check_steps(service_steps, 'service', _STEPS_SCHEMA)
|
||||
|
||||
|
||||
def _check_steps(steps, step_type, schema):
|
||||
"""Ensure all necessary keys are present and correct in steps.
|
||||
|
||||
@ -1429,6 +1481,7 @@ def _get_fields_for_node_query(fields=None):
|
||||
'retired',
|
||||
'retired_reason',
|
||||
'secure_boot',
|
||||
'service_step',
|
||||
'shard',
|
||||
'storage_interface',
|
||||
'target_power_state',
|
||||
@ -2105,7 +2158,7 @@ class NodesController(rest.RestController):
|
||||
'instance_info', 'driver_internal_info',
|
||||
'clean_step', 'deploy_step',
|
||||
'raid_config', 'target_raid_config',
|
||||
'traits', 'network_data']
|
||||
'traits', 'network_data', 'service_step']
|
||||
|
||||
_subcontroller_map = {
|
||||
'ports': port.PortsController,
|
||||
|
@ -808,7 +808,8 @@ VERSIONED_FIELDS = {
|
||||
'secure_boot': versions.MINOR_75_NODE_BOOT_MODE,
|
||||
'shard': versions.MINOR_82_NODE_SHARD,
|
||||
'parent_node': versions.MINOR_83_PARENT_CHILD_NODES,
|
||||
'firmware_interface': versions.MINOR_86_FIRMWARE_INTERFACE
|
||||
'firmware_interface': versions.MINOR_86_FIRMWARE_INTERFACE,
|
||||
'service_step': versions.MINOR_87_SERVICE
|
||||
}
|
||||
|
||||
for field in V31_FIELDS:
|
||||
@ -1957,6 +1958,11 @@ def allow_unhold_verb():
|
||||
return api.request.version.minor >= versions.MINOR_85_UNHOLD_VERB
|
||||
|
||||
|
||||
def allow_service_verb():
|
||||
"""Check if the service verb may be passed to the API."""
|
||||
return api.request.version.minor >= versions.MINOR_87_SERVICE
|
||||
|
||||
|
||||
def check_allow_deploy_steps(target, deploy_steps):
|
||||
"""Check if deploy steps are allowed"""
|
||||
|
||||
|
@ -123,6 +123,8 @@ BASE_VERSION = 1
|
||||
# v1.83: Add child node modeling
|
||||
# v1.84: Add ramdisk callback to continue inspection.
|
||||
# v1.85: Add unhold verb
|
||||
# v1.86: Add firmware interface
|
||||
# v1.87: Add service verb
|
||||
MINOR_0_JUNO = 0
|
||||
MINOR_1_INITIAL_VERSION = 1
|
||||
MINOR_2_AVAILABLE_STATE = 2
|
||||
@ -210,6 +212,8 @@ MINOR_83_PARENT_CHILD_NODES = 83
|
||||
MINOR_84_CONTINUE_INSPECTION = 84
|
||||
MINOR_85_UNHOLD_VERB = 85
|
||||
MINOR_86_FIRMWARE_INTERFACE = 86
|
||||
MINOR_87_SERVICE = 87
|
||||
|
||||
|
||||
# When adding another version, update:
|
||||
# - MINOR_MAX_VERSION
|
||||
@ -217,7 +221,7 @@ MINOR_86_FIRMWARE_INTERFACE = 86
|
||||
# explanation of what changed in the new version
|
||||
# - common/release_mappings.py, RELEASE_MAPPING['master']['api']
|
||||
|
||||
MINOR_MAX_VERSION = MINOR_86_FIRMWARE_INTERFACE
|
||||
MINOR_MAX_VERSION = MINOR_87_SERVICE
|
||||
|
||||
# String representations of the minor and maximum versions
|
||||
_MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION)
|
||||
|
@ -24,4 +24,8 @@ RESCUE_ABORT_FAILURE = 'rescue abort failure'
|
||||
""" Node is moved to maintenance due to failure of cleaning up during
|
||||
rescue abort. """
|
||||
|
||||
VALID_FAULTS = (POWER_FAILURE, CLEAN_FAILURE, RESCUE_ABORT_FAILURE)
|
||||
SERVICE_FAILURE = 'service failure'
|
||||
""" Node is moved to maintenance due to failure of a service operation. """
|
||||
|
||||
VALID_FAULTS = (POWER_FAILURE, CLEAN_FAILURE, RESCUE_ABORT_FAILURE,
|
||||
SERVICE_FAILURE)
|
||||
|
@ -989,3 +989,12 @@ class NeutronNetworkInterfaceMixin(object):
|
||||
# Fall back to non-managed in-band inspection
|
||||
raise exception.UnsupportedDriverExtension(
|
||||
driver=task.node.driver, extension='inspection')
|
||||
|
||||
def get_servicing_network_uuid(self, task):
|
||||
servicing_network = (
|
||||
task.node.driver_info.get('servicing_network')
|
||||
or CONF.neutron.servicing_network
|
||||
)
|
||||
return validate_network(
|
||||
servicing_network, _('servicing network'),
|
||||
context=task.context)
|
||||
|
@ -574,12 +574,12 @@ RELEASE_MAPPING = {
|
||||
}
|
||||
},
|
||||
'master': {
|
||||
'api': '1.86',
|
||||
'rpc': '1.56',
|
||||
'api': '1.87',
|
||||
'rpc': '1.57',
|
||||
'objects': {
|
||||
'Allocation': ['1.1'],
|
||||
'BIOSSetting': ['1.1'],
|
||||
'Node': ['1.39', '1.38', '1.37'],
|
||||
'Node': ['1.40', '1.39', '1.38', '1.37'],
|
||||
'NodeHistory': ['1.0'],
|
||||
'NodeInventory': ['1.0'],
|
||||
'Conductor': ['1.3'],
|
||||
|
@ -53,6 +53,7 @@ VERBS = {
|
||||
'rescue': 'rescue',
|
||||
'unrescue': 'unrescue',
|
||||
'unhold': 'unhold',
|
||||
'service': 'service',
|
||||
}
|
||||
""" Mapping of state-changing events that are PUT to the REST API
|
||||
|
||||
@ -235,11 +236,27 @@ UNRESCUEFAIL = 'unrescue failed'
|
||||
UNRESCUING = 'unrescuing'
|
||||
""" Node is being restored from rescue mode (to active state). """
|
||||
|
||||
SERVICE = 'service'
|
||||
""" Node is being requested to be modified through a service step. """
|
||||
|
||||
SERVICING = 'servicing'
|
||||
""" Node is actively being changed by a service step. """
|
||||
|
||||
SERVICEWAIT = 'service wait'
|
||||
""" Node is waiting for an operation to complete. """
|
||||
|
||||
SERVICEFAIL = 'service failed'
|
||||
""" Node has failed in a service step execution. """
|
||||
|
||||
SERVICEHOLD = 'service hold'
|
||||
""" Node is being held for direct intervention from a service step. """
|
||||
|
||||
|
||||
# NOTE(kaifeng): INSPECTING is allowed to keep backwards compatibility,
|
||||
# starting from API 1.39 node update is disallowed in this state.
|
||||
UPDATE_ALLOWED_STATES = (DEPLOYFAIL, INSPECTING, INSPECTFAIL, INSPECTWAIT,
|
||||
CLEANFAIL, ERROR, VERIFYING, ADOPTFAIL, RESCUEFAIL,
|
||||
UNRESCUEFAIL)
|
||||
UNRESCUEFAIL, SERVICE, SERVICEHOLD, SERVICEFAIL)
|
||||
"""Transitional states in which we allow updating a node."""
|
||||
|
||||
DELETE_ALLOWED_STATES = (MANAGEABLE, ENROLL, ADOPTFAIL)
|
||||
@ -250,7 +267,7 @@ STABLE_STATES = (ENROLL, MANAGEABLE, AVAILABLE, ACTIVE, ERROR, RESCUE)
|
||||
|
||||
UNSTABLE_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT, VERIFYING,
|
||||
DELETING, INSPECTING, INSPECTWAIT, ADOPTING, RESCUING,
|
||||
RESCUEWAIT, UNRESCUING)
|
||||
RESCUEWAIT, UNRESCUING, SERVICING, SERVICEWAIT)
|
||||
"""States that can be changed without external request."""
|
||||
|
||||
STUCK_STATES_TREATED_AS_FAIL = (DEPLOYING, CLEANING, VERIFYING, INSPECTING,
|
||||
@ -272,12 +289,15 @@ _FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE,
|
||||
DEPLOYING, DEPLOYWAIT,
|
||||
CLEANING, CLEANWAIT,
|
||||
INSPECTING, INSPECTWAIT,
|
||||
RESCUING, RESCUEWAIT)
|
||||
RESCUING, RESCUEWAIT,
|
||||
SERVICING, SERVICEWAIT,
|
||||
SERVICEHOLD)
|
||||
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
|
||||
"""States where API lookups are permitted with fast track enabled."""
|
||||
|
||||
FAILURE_STATES = frozenset((DEPLOYFAIL, CLEANFAIL, INSPECTFAIL,
|
||||
RESCUEFAIL, UNRESCUEFAIL, ADOPTFAIL))
|
||||
RESCUEFAIL, UNRESCUEFAIL, ADOPTFAIL,
|
||||
SERVICEFAIL))
|
||||
|
||||
|
||||
##############
|
||||
@ -594,3 +614,49 @@ machine.add_transition(ADOPTFAIL, ADOPTING, 'adopt')
|
||||
|
||||
# A node that failed adoption can be moved back to manageable
|
||||
machine.add_transition(ADOPTFAIL, MANAGEABLE, 'manage')
|
||||
|
||||
# Add service* states
|
||||
machine.add_state(SERVICING, target=ACTIVE, **watchers)
|
||||
machine.add_state(SERVICEWAIT, target=ACTIVE, **watchers)
|
||||
machine.add_state(SERVICEFAIL, target=ACTIVE, **watchers)
|
||||
machine.add_state(SERVICEHOLD, target=ACTIVE, **watchers)
|
||||
|
||||
# A node in service an be returned to active
|
||||
machine.add_transition(SERVICING, ACTIVE, 'done')
|
||||
|
||||
# A node in active can be serviced
|
||||
machine.add_transition(ACTIVE, SERVICING, 'service')
|
||||
|
||||
# A node in servicing can be failed
|
||||
machine.add_transition(SERVICING, SERVICEFAIL, 'fail')
|
||||
|
||||
# A node in service can enter a wait state
|
||||
machine.add_transition(SERVICING, SERVICEWAIT, 'wait')
|
||||
|
||||
# A node in service can be held
|
||||
machine.add_transition(SERVICING, SERVICEHOLD, 'hold')
|
||||
machine.add_transition(SERVICEWAIT, SERVICEHOLD, 'hold')
|
||||
|
||||
# A held node in service can get more service steps to start over
|
||||
machine.add_transition(SERVICEHOLD, SERVICING, 'service')
|
||||
|
||||
# A held node in service can be removed from service
|
||||
machine.add_transition(SERVICEHOLD, SERVICEWAIT, 'unhold')
|
||||
|
||||
# A node in service wait can resume
|
||||
machine.add_transition(SERVICEWAIT, SERVICING, 'resume')
|
||||
|
||||
# A node in service wait can failed
|
||||
machine.add_transition(SERVICEWAIT, SERVICEFAIL, 'fail')
|
||||
|
||||
# A node in service wait can be aborted
|
||||
machine.add_transition(SERVICEWAIT, SERVICEFAIL, 'abort')
|
||||
|
||||
# A node in service hold can be aborted
|
||||
machine.add_transition(SERVICEHOLD, SERVICEFAIL, 'abort')
|
||||
|
||||
# A node in service fail can re-enter service
|
||||
machine.add_transition(SERVICEFAIL, SERVICING, 'service')
|
||||
|
||||
# A node in service fail can be rescued
|
||||
machine.add_transition(SERVICEFAIL, RESCUING, 'rescue')
|
||||
|
@ -66,6 +66,7 @@ from ironic.conductor import deployments
|
||||
from ironic.conductor import inspection
|
||||
from ironic.conductor import notification_utils as notify_utils
|
||||
from ironic.conductor import periodics
|
||||
from ironic.conductor import servicing
|
||||
from ironic.conductor import steps as conductor_steps
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils
|
||||
@ -93,7 +94,7 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
# NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's.
|
||||
# NOTE(pas-ha): This also must be in sync with
|
||||
# ironic.common.release_mappings.RELEASE_MAPPING['master']
|
||||
RPC_API_VERSION = '1.56'
|
||||
RPC_API_VERSION = '1.57'
|
||||
|
||||
target = messaging.Target(version=RPC_API_VERSION)
|
||||
|
||||
@ -3710,6 +3711,81 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
task, inventory, plugin_data),
|
||||
err_handler=utils.provisioning_error_handler)
|
||||
|
||||
@METRICS.timer('ConductorManager.do_node_service')
|
||||
@messaging.expected_exceptions(exception.InvalidParameterValue,
|
||||
exception.InvalidStateRequested,
|
||||
exception.NodeInMaintenance,
|
||||
exception.NodeLocked,
|
||||
exception.NoFreeConductorWorker,
|
||||
exception.ConcurrentActionLimit)
|
||||
def do_node_service(self, context, node_id, service_steps,
|
||||
disable_ramdisk=False):
|
||||
"""RPC method to initiate node service.
|
||||
|
||||
:param context: an admin context.
|
||||
:param node_id: the ID or UUID of a node.
|
||||
:param service_steps: an ordered list of steps that will be
|
||||
performed on the node. A step is a dictionary with required
|
||||
keys 'interface' and 'step', and optional key 'args'. If
|
||||
specified, the 'args' arguments are passed to the clean step
|
||||
method.::
|
||||
|
||||
{ 'interface': <driver_interface>,
|
||||
'step': <name_of__step>,
|
||||
'args': {<arg1>: <value1>, ..., <argn>: <valuen>} }
|
||||
|
||||
For example (this isn't a real example, this service step
|
||||
doesn't exist)::
|
||||
|
||||
{ 'interface': deploy',
|
||||
'step': 'upgrade_firmware',
|
||||
'args': {'force': True} }
|
||||
:param disable_ramdisk: Optional. Whether to disable the ramdisk boot.
|
||||
:raises: InvalidParameterValue if power validation fails.
|
||||
:raises: InvalidStateRequested if the node is not in manageable state.
|
||||
:raises: NodeLocked if node is locked by another conductor.
|
||||
:raises: NoFreeConductorWorker when there is no free worker to start
|
||||
async task.
|
||||
:raises: ConcurrentActionLimit If this action would exceed the
|
||||
configured limits of the deployment.
|
||||
"""
|
||||
self._concurrent_action_limit(action='service')
|
||||
with task_manager.acquire(context, node_id, shared=False,
|
||||
purpose='node service') as task:
|
||||
node = task.node
|
||||
if node.maintenance:
|
||||
raise exception.NodeInMaintenance(op=_('service'),
|
||||
node=node.uuid)
|
||||
# NOTE(TheJulia): service.do_node_service() will also make similar
|
||||
# calls to validate power & network, but we are doing it again
|
||||
# here so that the user gets immediate feedback of any issues.
|
||||
# This behaviour (of validating) is consistent with other methods
|
||||
# like self.do_node_deploy().
|
||||
try:
|
||||
task.driver.power.validate(task)
|
||||
task.driver.network.validate(task)
|
||||
except exception.InvalidParameterValue as e:
|
||||
msg = (_('Validation of node %(node)s for servicing '
|
||||
'failed: %(msg)s') %
|
||||
{'node': node.uuid, 'msg': e})
|
||||
raise exception.InvalidParameterValue(msg)
|
||||
try:
|
||||
task.process_event(
|
||||
'service',
|
||||
callback=self._spawn_worker,
|
||||
call_args=(servicing.do_node_service, task, service_steps,
|
||||
disable_ramdisk),
|
||||
err_handler=utils.provisioning_error_handler,
|
||||
target_state=states.ACTIVE)
|
||||
except exception.InvalidState:
|
||||
raise exception.InvalidStateRequested(
|
||||
action='service', node=node.uuid,
|
||||
state=node.provision_state)
|
||||
|
||||
|
||||
# NOTE(TheJulia): This is the end of the class definition for the
|
||||
# conductor manager. Methods for RPC and stuffs should go above this
|
||||
# point in the File. Everything below is a helper or periodic.
|
||||
|
||||
@METRICS.timer('get_vendor_passthru_metadata')
|
||||
def get_vendor_passthru_metadata(route_dict):
|
||||
|
@ -153,12 +153,13 @@ class ConductorAPI(object):
|
||||
heartbeat
|
||||
| 1.55 - Added change_node_boot_mode
|
||||
| 1.56 - Added continue_inspection
|
||||
| 1.57 - Added do_node_service
|
||||
"""
|
||||
|
||||
# NOTE(rloo): This must be in sync with manager.ConductorManager's.
|
||||
# NOTE(pas-ha): This also must be in sync with
|
||||
# ironic.common.release_mappings.RELEASE_MAPPING['master']
|
||||
RPC_API_VERSION = '1.56'
|
||||
RPC_API_VERSION = '1.57'
|
||||
|
||||
def __init__(self, topic=None):
|
||||
super(ConductorAPI, self).__init__()
|
||||
@ -1409,3 +1410,27 @@ class ConductorAPI(object):
|
||||
cctxt = self._prepare_call(topic=topic, version='1.56')
|
||||
return cctxt.call(context, 'continue_inspection', node_id=node_id,
|
||||
inventory=inventory, plugin_data=plugin_data)
|
||||
|
||||
def do_node_service(self, context, node_id, service_steps,
|
||||
disable_ramdisk=None, topic=None):
|
||||
"""Signal to conductor service to perform manual cleaning on a node.
|
||||
|
||||
:param context: request context.
|
||||
:param node_id: node ID or UUID.
|
||||
:param service_steps: a list of service step dictionaries.
|
||||
:param disable_ramdisk: Whether to skip booting ramdisk for service.
|
||||
:param topic: RPC topic. Defaults to self.topic.
|
||||
:raises: InvalidParameterValue if validation of power driver interface
|
||||
failed.
|
||||
:raises: InvalidStateRequested if cleaning can not be performed.
|
||||
:raises: NodeInMaintenance if node is in maintenance mode.
|
||||
:raises: NodeLocked if node is locked by another conductor.
|
||||
:raises: NoFreeConductorWorker when there is no free worker to start
|
||||
async task.
|
||||
"""
|
||||
cctxt = self._prepare_call(topic=topic, version='1.57')
|
||||
return cctxt.call(
|
||||
context, 'do_node_service',
|
||||
node_id=node_id,
|
||||
service_steps=service_steps,
|
||||
disable_ramdisk=disable_ramdisk)
|
||||
|
377
ironic/conductor/servicing.py
Normal file
377
ironic/conductor/servicing.py
Normal file
@ -0,0 +1,377 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Functionality related to servicing."""
|
||||
|
||||
from oslo_log import log
|
||||
|
||||
from ironic.common import exception
|
||||
from ironic.common.i18n import _
|
||||
from ironic.common import states
|
||||
from ironic.conductor import steps as conductor_steps
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils
|
||||
from ironic.conf import CONF
|
||||
from ironic.drivers import utils as driver_utils
|
||||
from ironic import objects
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
@task_manager.require_exclusive_lock
|
||||
def do_node_service(task, service_steps=None, disable_ramdisk=False):
|
||||
"""Internal RPC method to perform servicing of a node.
|
||||
|
||||
:param task: a TaskManager instance with an exclusive lock on its node
|
||||
:param service_steps: The list of service steps to perform. If none, step
|
||||
validation will fail.
|
||||
:param disable_ramdisk: Whether to skip booting ramdisk for servicing.
|
||||
"""
|
||||
node = task.node
|
||||
try:
|
||||
# NOTE(ghe): Valid power and network values are needed to perform
|
||||
# a service operation.
|
||||
task.driver.power.validate(task)
|
||||
if not disable_ramdisk:
|
||||
task.driver.network.validate(task)
|
||||
except (exception.InvalidParameterValue, exception.NetworkError) as e:
|
||||
msg = (_('Validation of node %(node)s for service failed: %(msg)s') %
|
||||
{'node': node.uuid, 'msg': e})
|
||||
return utils.servicing_error_handler(task, msg)
|
||||
|
||||
utils.wipe_service_internal_info(task)
|
||||
node.set_driver_internal_info('service_steps', service_steps)
|
||||
node.set_driver_internal_info('service_disable_ramdisk',
|
||||
disable_ramdisk)
|
||||
task.node.save()
|
||||
|
||||
# Allow the deploy driver to set up the ramdisk again (necessary for IPA)
|
||||
try:
|
||||
if not disable_ramdisk:
|
||||
prepare_result = task.driver.deploy.prepare_service(task)
|
||||
else:
|
||||
LOG.info('Skipping preparing for service in-band service since '
|
||||
'out-of-band only service has been requested for node '
|
||||
'%s', node.uuid)
|
||||
prepare_result = None
|
||||
except Exception as e:
|
||||
msg = (_('Failed to prepare node %(node)s for service: %(e)s')
|
||||
% {'node': node.uuid, 'e': e})
|
||||
return utils.servicing_error_handler(task, msg, traceback=True)
|
||||
|
||||
if prepare_result == states.SERVICEWAIT:
|
||||
# Prepare is asynchronous, the deploy driver will need to
|
||||
# set node.driver_internal_info['service_steps'] and
|
||||
# node.service_step and then make an RPC call to
|
||||
# continue_node_service to start service operations.
|
||||
task.process_event('wait')
|
||||
return
|
||||
try:
|
||||
conductor_steps.set_node_service_steps(
|
||||
task, disable_ramdisk=disable_ramdisk)
|
||||
except Exception as e:
|
||||
# Catch all exceptions and follow the error handling
|
||||
# path so things are cleaned up properly.
|
||||
msg = (_('Cannot service node %(node)s: %(msg)s')
|
||||
% {'node': node.uuid, 'msg': e})
|
||||
return utils.servicing_error_handler(task, msg)
|
||||
|
||||
steps = node.driver_internal_info.get('service_steps', [])
|
||||
if not steps:
|
||||
_tear_down_node_service(task, disable_ramdisk=disable_ramdisk)
|
||||
step_index = 0 if steps else None
|
||||
do_next_service_step(task, step_index, disable_ramdisk=disable_ramdisk)
|
||||
|
||||
|
||||
@utils.fail_on_error(utils.servicing_error_handler,
|
||||
_("Unexpected error when processing next service step"),
|
||||
traceback=True)
|
||||
@task_manager.require_exclusive_lock
|
||||
def do_next_service_step(task, step_index, disable_ramdisk=None):
|
||||
"""Do service, starting from the specified service step.
|
||||
|
||||
:param task: a TaskManager instance with an exclusive lock
|
||||
:param step_index: The first service step in the list to execute. This
|
||||
is the index (from 0) into the list of service steps in the node's
|
||||
driver_internal_info['service_steps']. Is None if there are no steps
|
||||
to execute.
|
||||
:param disable_ramdisk: Whether to skip booting ramdisk for service.
|
||||
"""
|
||||
node = task.node
|
||||
# For manual cleaning, the target provision state is MANAGEABLE,
|
||||
# whereas for automated cleaning, it is AVAILABLE.
|
||||
if step_index is None:
|
||||
steps = []
|
||||
else:
|
||||
assert node.driver_internal_info.get('service_steps') is not None, \
|
||||
f"BUG: No steps for {node.uuid}, step index is {step_index}"
|
||||
steps = node.driver_internal_info['service_steps'][step_index:]
|
||||
|
||||
if disable_ramdisk is None:
|
||||
disable_ramdisk = node.driver_internal_info.get(
|
||||
'service_disable_ramdisk', False)
|
||||
|
||||
LOG.info('Executing service on node %(node)s, remaining steps: '
|
||||
'%(steps)s', {'node': node.uuid, 'steps': steps})
|
||||
# Execute each step until we hit an async step or run out of steps
|
||||
for ind, step in enumerate(steps):
|
||||
# Save which step we're about to start so we can restart
|
||||
# if necessary
|
||||
node.service_step = step
|
||||
node.set_driver_internal_info('service_step_index', step_index + ind)
|
||||
node.save()
|
||||
eocn = step.get('execute_on_child_nodes', False)
|
||||
result = None
|
||||
try:
|
||||
if not eocn:
|
||||
LOG.info('Executing %(step)s on node %(node)s',
|
||||
{'step': step, 'node': node.uuid})
|
||||
use_step_handler = conductor_steps.use_reserved_step_handler(
|
||||
task, step)
|
||||
if use_step_handler:
|
||||
if use_step_handler == conductor_steps.EXIT_STEPS:
|
||||
# Exit the step, i.e. hold step
|
||||
return
|
||||
# if use_step_handler == conductor_steps.USED_HANDLER
|
||||
# Then we have completed the needful in the handler,
|
||||
# but since there is no other value to check now,
|
||||
# we know we just need to skip execute_deploy_step
|
||||
else:
|
||||
interface = getattr(task.driver, step.get('interface'))
|
||||
result = interface.execute_service_step(task, step)
|
||||
else:
|
||||
LOG.info('Executing %(step)s on child nodes for node '
|
||||
'%(node)s.',
|
||||
{'step': step, 'node': node.uuid})
|
||||
result = execute_step_on_child_nodes(task, step)
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, exception.AgentConnectionFailed):
|
||||
if task.node.driver_internal_info.get('service_reboot'):
|
||||
LOG.info('Agent is not yet running on node %(node)s '
|
||||
'after service reboot, waiting for agent to '
|
||||
'come up to run next service step %(step)s.',
|
||||
{'node': node.uuid, 'step': step})
|
||||
node.set_driver_internal_info('skip_current_service_step',
|
||||
False)
|
||||
task.process_event('wait')
|
||||
return
|
||||
if isinstance(e, exception.AgentInProgress):
|
||||
LOG.info('Conductor attempted to process service step for '
|
||||
'node %(node)s. Agent indicated it is presently '
|
||||
'executing a command. Error: %(error)s',
|
||||
{'node': task.node.uuid,
|
||||
'error': e})
|
||||
node.set_driver_internal_info(
|
||||
'skip_current_service_step', False)
|
||||
task.process_event('wait')
|
||||
return
|
||||
|
||||
msg = (_('Node %(node)s failed step %(step)s: '
|
||||
'%(exc)s') %
|
||||
{'node': node.uuid, 'exc': e,
|
||||
'step': node.service_step})
|
||||
if not disable_ramdisk:
|
||||
driver_utils.collect_ramdisk_logs(task.node, label='service')
|
||||
utils.servicing_error_handler(task, msg, traceback=True)
|
||||
return
|
||||
|
||||
# Check if the step is done or not. The step should return
|
||||
# states.SERVICEWAIT if the step is still being executed, or
|
||||
# None if the step is done.
|
||||
if result == states.SERVICEWAIT:
|
||||
# Kill this worker, the async step will make an RPC call to
|
||||
# continue_node_service to continue service
|
||||
LOG.info('Service step %(step)s on node %(node)s being '
|
||||
'executed asynchronously, waiting for driver.',
|
||||
{'node': node.uuid, 'step': step})
|
||||
task.process_event('wait')
|
||||
return
|
||||
elif result is not None:
|
||||
msg = (_('While executing step %(step)s on node '
|
||||
'%(node)s, step returned invalid value: %(val)s')
|
||||
% {'step': step, 'node': node.uuid, 'val': result})
|
||||
return utils.servicing_error_handler(task, msg)
|
||||
LOG.info('Node %(node)s finished service step %(step)s',
|
||||
{'node': node.uuid, 'step': step})
|
||||
utils.wipe_service_internal_info(task)
|
||||
if CONF.agent.deploy_logs_collect == 'always' and not disable_ramdisk:
|
||||
driver_utils.collect_ramdisk_logs(task.node, label='service')
|
||||
_tear_down_node_service(task, disable_ramdisk)
|
||||
|
||||
|
||||
def _tear_down_node_service(task, disable_ramdisk):
|
||||
"""Clean up a node from service.
|
||||
|
||||
:param task: A Taskmanager object.
|
||||
:returns: None
|
||||
"""
|
||||
task.node.service_step = None
|
||||
utils.wipe_service_internal_info(task)
|
||||
task.node.save()
|
||||
if not disable_ramdisk:
|
||||
try:
|
||||
task.driver.deploy.tear_down_service(task)
|
||||
except Exception as e:
|
||||
msg = (_('Failed to tear down from service for node %(node)s, '
|
||||
'reason: %(err)s')
|
||||
% {'node': task.node.uuid, 'err': e})
|
||||
return utils.servicing_error_handler(task, msg,
|
||||
traceback=True,
|
||||
tear_down_service=False)
|
||||
LOG.info('Node %s service complete.', task.node.uuid)
|
||||
task.process_event('done')
|
||||
|
||||
|
||||
def execute_step_on_child_nodes(task, step):
|
||||
"""Execute a requested step against a child node.
|
||||
|
||||
:param task: The TaskManager object for the parent node.
|
||||
:param step: The requested step to be executed.
|
||||
:returns: None on Success, the resulting error message if a
|
||||
failure has occured.
|
||||
"""
|
||||
# NOTE(TheJulia): We could just use nodeinfo list calls against
|
||||
# dbapi.
|
||||
# NOTE(TheJulia): We validate the data in advance in the API
|
||||
# with the original request context.
|
||||
eocn = step.get('execute_on_child_nodes')
|
||||
child_nodes = step.get('limit_child_node_execution', [])
|
||||
filters = {'parent_node': task.node.uuid}
|
||||
if eocn and len(child_nodes) >= 1:
|
||||
filters['uuid_in'] = child_nodes
|
||||
child_nodes = objects.Node.list(
|
||||
task.context,
|
||||
filters=filters,
|
||||
fields=['uuid']
|
||||
)
|
||||
for child_node in child_nodes:
|
||||
result = None
|
||||
LOG.info('Executing step %(step)s on child node %(node)s for parent '
|
||||
'node %(parent_node)s',
|
||||
{'step': step,
|
||||
'node': child_node.uuid,
|
||||
'parent_node': task.node.uuid})
|
||||
with task_manager.acquire(task.context,
|
||||
child_node.uuid,
|
||||
purpose='execute step') as child_task:
|
||||
interface = getattr(child_task.driver, step.get('interface'))
|
||||
LOG.info('Executing %(step)s on node %(node)s',
|
||||
{'step': step, 'node': child_task.node.uuid})
|
||||
if not conductor_steps.use_reserved_step_handler(child_task, step):
|
||||
result = interface.execute_service_step(child_task, step)
|
||||
if result is not None:
|
||||
if (result == states.SERVICEWAIT
|
||||
and CONF.conductor.permit_child_node_step_async_result):
|
||||
# Operator has chosen to permit this due to some reason
|
||||
# NOTE(TheJulia): This is where we would likely wire agent
|
||||
# error handling if we ever implicitly allowed child node
|
||||
# deploys to take place with the agent from a parent node
|
||||
# being deployed.
|
||||
continue
|
||||
msg = (_('While executing step %(step)s on child node '
|
||||
'%(node)s, step returned invalid value: %(val)s')
|
||||
% {'step': step, 'node': child_task.node.uuid,
|
||||
'val': result})
|
||||
LOG.error(msg)
|
||||
# Only None or states.SERVICEWAIT are possible paths forward
|
||||
# in the parent step execution code, so returning the message
|
||||
# means it will be logged.
|
||||
return msg
|
||||
|
||||
|
||||
def get_last_error(node):
|
||||
last_error = _('By request, the service operation was aborted')
|
||||
if node.service_step:
|
||||
last_error += (
|
||||
_(' during or after the completion of step "%s"')
|
||||
% conductor_steps.step_id(node.service_step)
|
||||
)
|
||||
return last_error
|
||||
|
||||
|
||||
@task_manager.require_exclusive_lock
|
||||
def do_node_service_abort(task):
|
||||
"""Internal method to abort an ongoing operation.
|
||||
|
||||
:param task: a TaskManager instance with an exclusive lock
|
||||
"""
|
||||
node = task.node
|
||||
try:
|
||||
task.driver.deploy.tear_down_service(task)
|
||||
except Exception as e:
|
||||
log_msg = (_('Failed to tear down service for node %(node)s '
|
||||
'after aborting the operation. Error: %(err)s') %
|
||||
{'node': node.uuid, 'err': e})
|
||||
error_msg = _('Failed to tear down service after aborting '
|
||||
'the operation')
|
||||
utils.servicing_error_handler(task, log_msg,
|
||||
errmsg=error_msg,
|
||||
traceback=True,
|
||||
tear_down_service=False,
|
||||
set_fail_state=False)
|
||||
return
|
||||
|
||||
last_error = get_last_error(node)
|
||||
info_message = _('Clean operation aborted for node %s') % node.uuid
|
||||
if node.service_step:
|
||||
info_message += (
|
||||
_(' during or after the completion of step "%s"')
|
||||
% node.service_step
|
||||
)
|
||||
|
||||
node.last_error = last_error
|
||||
node.service_step = None
|
||||
utils.wipe_service_internal_info(task)
|
||||
node.save()
|
||||
LOG.info(info_message)
|
||||
|
||||
|
||||
@utils.fail_on_error(utils.servicing_error_handler,
|
||||
_("Unexpected error when processing next service step"),
|
||||
traceback=True)
|
||||
@task_manager.require_exclusive_lock
|
||||
def continue_node_service(task):
|
||||
"""Continue servicing after finishing an async service step.
|
||||
|
||||
This function calculates which step has to run next and passes control
|
||||
into do_next_service_step.
|
||||
|
||||
:param task: a TaskManager instance with an exclusive lock
|
||||
"""
|
||||
node = task.node
|
||||
|
||||
next_step_index = utils.update_next_step_index(task, 'service')
|
||||
|
||||
# If this isn't the final service step in the service operation
|
||||
# and it is flagged to abort after the service step that just
|
||||
# finished, we abort the operation.
|
||||
if node.service_step.get('abort_after'):
|
||||
step_name = node.service_step['step']
|
||||
if next_step_index is not None:
|
||||
LOG.debug('The service operation for node %(node)s was '
|
||||
'marked to be aborted after step "%(step)s '
|
||||
'completed. Aborting now that it has completed.',
|
||||
{'node': task.node.uuid, 'step': step_name})
|
||||
|
||||
task.process_event('fail')
|
||||
do_node_service_abort(task)
|
||||
return
|
||||
|
||||
LOG.debug('The service operation for node %(node)s was '
|
||||
'marked to be aborted after step "%(step)s" '
|
||||
'completed. However, since there are no more '
|
||||
'service steps after this, the abort is not going '
|
||||
'to be done.', {'node': node.uuid,
|
||||
'step': step_name})
|
||||
|
||||
do_next_service_step(task, next_step_index)
|
@ -54,6 +54,8 @@ DEPLOYING_INTERFACE_PRIORITY = {
|
||||
'raid': 1,
|
||||
}
|
||||
|
||||
SERVICING_INTERFACE_PRIORITY = DEPLOYING_INTERFACE_PRIORITY.copy()
|
||||
|
||||
VERIFYING_INTERFACE_PRIORITY = {
|
||||
# When two verify steps have the same priority, their order is determined
|
||||
# by which interface is implementing the verify step. The verifying step of
|
||||
@ -127,6 +129,15 @@ def _sorted_steps(steps, sort_step_key):
|
||||
return sorted(steps, key=sort_step_key, reverse=True)
|
||||
|
||||
|
||||
def _service_step_key(step):
|
||||
"""Sort by priority, then interface priority in event of tie.
|
||||
|
||||
:param step: deploy step dict to get priority for.
|
||||
"""
|
||||
return (step.get('priority'),
|
||||
SERVICING_INTERFACE_PRIORITY[step.get('interface')])
|
||||
|
||||
|
||||
def is_equivalent(step1, step2):
|
||||
"""Compare steps, ignoring their priority."""
|
||||
return (step1.get('interface') == step2.get('interface')
|
||||
@ -240,6 +251,26 @@ def _get_deployment_steps(task, enabled=False, sort=True):
|
||||
enabled=enabled, sort_step_key=sort_key)
|
||||
|
||||
|
||||
def _get_service_steps(task, enabled=False, sort=True):
|
||||
"""Get service steps for task.node.
|
||||
|
||||
:param task: A TaskManager object
|
||||
:param enabled: If True, returns only enabled (priority > 0) steps. If
|
||||
False, returns all clean steps.
|
||||
:param sort: If True, the steps are sorted from highest priority to lowest
|
||||
priority. For steps having the same priority, they are sorted from
|
||||
highest interface priority to lowest.
|
||||
:raises: NodeServicingFailure if there was a problem getting the
|
||||
clean steps.
|
||||
:returns: A list of clean step dictionaries
|
||||
"""
|
||||
sort_key = _service_step_key if sort else None
|
||||
service_steps = _get_steps(task, SERVICING_INTERFACE_PRIORITY,
|
||||
'get_service_steps', enabled=enabled,
|
||||
sort_step_key=sort_key)
|
||||
return service_steps
|
||||
|
||||
|
||||
def _get_verify_steps(task, enabled=False, sort=True):
|
||||
"""Get verify steps for task.node.
|
||||
|
||||
@ -455,6 +486,34 @@ def set_node_deployment_steps(task, reset_current=True, skip_missing=False):
|
||||
node.save()
|
||||
|
||||
|
||||
def set_node_service_steps(task, disable_ramdisk=False):
|
||||
"""Set up the node with clean step information for cleaning.
|
||||
|
||||
For automated cleaning, get the clean steps from the driver.
|
||||
For manual cleaning, the user's clean steps are known but need to be
|
||||
validated against the driver's clean steps.
|
||||
|
||||
:param disable_ramdisk: If `True`, only steps with requires_ramdisk=False
|
||||
are accepted.
|
||||
:raises: InvalidParameterValue if there is a problem with the user's
|
||||
clean steps.
|
||||
:raises: NodeCleaningFailure if there was a problem getting the
|
||||
clean steps.
|
||||
"""
|
||||
node = task.node
|
||||
steps = _validate_user_service_steps(
|
||||
task, node.driver_internal_info.get('service_steps', []),
|
||||
disable_ramdisk=disable_ramdisk)
|
||||
LOG.debug('List of the steps for service of node %(node)s: '
|
||||
'%(steps)s', {'node': node.uuid,
|
||||
'steps': steps})
|
||||
|
||||
node.service_step = {}
|
||||
node.set_driver_internal_info('service_steps', steps)
|
||||
node.set_driver_internal_info('service_step_index', None)
|
||||
node.save()
|
||||
|
||||
|
||||
def step_id(step):
|
||||
"""Return the 'ID' of a deploy step.
|
||||
|
||||
@ -705,7 +764,6 @@ def _validate_user_steps(task, user_steps, driver_steps, step_type,
|
||||
err = error_prefix or ''
|
||||
err += '; '.join(errors)
|
||||
raise exception.InvalidParameterValue(err=err)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@ -769,6 +827,36 @@ def _validate_user_deploy_steps(task, user_steps, error_prefix=None,
|
||||
skip_missing=skip_missing)
|
||||
|
||||
|
||||
def _validate_user_service_steps(task, user_steps, disable_ramdisk=False):
|
||||
"""Validate the user-specified service steps.
|
||||
|
||||
:param task: A TaskManager object
|
||||
:param user_steps: a list of clean steps. A clean step is a dictionary
|
||||
with required keys 'interface' and 'step', and optional key 'args'::
|
||||
|
||||
{ 'interface': <driver_interface>,
|
||||
'step': <name_of_clean_step>,
|
||||
'args': {<arg1>: <value1>, ..., <argn>: <valuen>} }
|
||||
|
||||
For example::
|
||||
|
||||
{ 'interface': 'deploy',
|
||||
'step': 'upgrade_firmware',
|
||||
'args': {'force': True} }
|
||||
:param disable_ramdisk: If `True`, only steps with requires_ramdisk=False
|
||||
are accepted.
|
||||
:raises: InvalidParameterValue if validation of clean steps fails.
|
||||
:raises: NodeCleaningFailure if there was a problem getting the
|
||||
clean steps from the driver.
|
||||
:return: validated clean steps update with information from the driver
|
||||
"""
|
||||
# We call with enabled = False below so we pickup auto-disabled
|
||||
# steps, since service steps are not automagic like cleaning can be.
|
||||
driver_steps = _get_service_steps(task, enabled=False, sort=False)
|
||||
return _validate_user_steps(task, user_steps, driver_steps, 'service',
|
||||
disable_ramdisk=disable_ramdisk)
|
||||
|
||||
|
||||
def _get_validated_user_deploy_steps(task, deploy_steps=None,
|
||||
skip_missing=False):
|
||||
"""Validate the deploy steps for a node.
|
||||
|
@ -574,6 +574,20 @@ def wipe_cleaning_internal_info(task):
|
||||
node.del_driver_internal_info('steps_validated')
|
||||
|
||||
|
||||
def wipe_service_internal_info(task):
|
||||
"""Remove temporary servicing fields from driver_internal_info."""
|
||||
wipe_token_and_url(task)
|
||||
node = task.node
|
||||
node.set_driver_internal_info('service_steps', None)
|
||||
node.del_driver_internal_info('agent_cached_service_steps')
|
||||
node.del_driver_internal_info('service_step_index')
|
||||
node.del_driver_internal_info('service_reboot')
|
||||
node.del_driver_internal_info('service_polling')
|
||||
node.del_driver_internal_info('service_disable_ramdisk')
|
||||
node.del_driver_internal_info('skip_current_service_step')
|
||||
node.del_driver_internal_info('steps_validated')
|
||||
|
||||
|
||||
def deploying_error_handler(task, logmsg, errmsg=None, traceback=False,
|
||||
clean_up=True):
|
||||
"""Put a failed node in DEPLOYFAIL.
|
||||
@ -1209,7 +1223,7 @@ def _get_node_next_steps(task, step_type, skip_current_step=True):
|
||||
:returns: index of the next step; None if there are none to execute.
|
||||
|
||||
"""
|
||||
valid_types = set(['clean', 'deploy'])
|
||||
valid_types = set(['clean', 'deploy', 'service'])
|
||||
if step_type not in valid_types:
|
||||
# NOTE(rloo): No need to i18n this, since this would be a
|
||||
# developer error; it isn't user-facing.
|
||||
@ -1745,3 +1759,72 @@ def get_token_project_from_request(ctx):
|
||||
except AttributeError:
|
||||
LOG.warning('Attempted to identify requestor project ID value, '
|
||||
'however we were unable to do so. Possible older API?')
|
||||
|
||||
|
||||
def servicing_error_handler(task, logmsg, errmsg=None, traceback=False,
|
||||
tear_down_service=True, set_fail_state=True,
|
||||
set_maintenance=None):
|
||||
"""Put a failed node in SERVICEFAIL and maintenance (if needed).
|
||||
|
||||
:param task: a TaskManager instance.
|
||||
:param logmsg: Message to be logged.
|
||||
:param errmsg: Message for the user. Optional, if not provided `logmsg` is
|
||||
used.
|
||||
:param traceback: Whether to log a traceback. Defaults to False.
|
||||
:param tear_down_service: Whether to clean up the PXE and DHCP files after
|
||||
servie. Default to True.
|
||||
:param set_fail_state: Whether to set node to failed state. Default to
|
||||
True.
|
||||
:param set_maintenance: Whether to set maintenance mode. If None,
|
||||
maintenance mode will be set if and only if a clean step is being
|
||||
executed on a node.
|
||||
"""
|
||||
if set_maintenance is None:
|
||||
set_maintenance = bool(task.node.service_step)
|
||||
|
||||
errmsg = errmsg or logmsg
|
||||
LOG.error(logmsg, exc_info=traceback)
|
||||
node = task.node
|
||||
if set_maintenance:
|
||||
node.fault = faults.SERVICE_FAILURE
|
||||
node.maintenance = True
|
||||
|
||||
if tear_down_service:
|
||||
try:
|
||||
task.driver.deploy.tear_down_service(task)
|
||||
except Exception as e:
|
||||
msg2 = ('Failed to tear down servicing on node %(uuid)s, '
|
||||
'reason: %(err)s' % {'err': e, 'uuid': node.uuid})
|
||||
LOG.exception(msg2)
|
||||
errmsg = _('%s. Also failed to tear down servicing.') % errmsg
|
||||
|
||||
if node.provision_state in (
|
||||
states.SERVICING,
|
||||
states.SERVICEWAIT,
|
||||
states.SERVICEFAIL):
|
||||
# Clear clean step, msg should already include current step
|
||||
node.service_step = {}
|
||||
# Clear any leftover metadata about cleaning
|
||||
node.del_driver_internal_info('service_step_index')
|
||||
node.del_driver_internal_info('servicing_reboot')
|
||||
node.del_driver_internal_info('servicing_polling')
|
||||
node.del_driver_internal_info('skip_current_service_step')
|
||||
# We don't need to keep the old agent URL, or token
|
||||
# as it should change upon the next cleaning attempt.
|
||||
wipe_token_and_url(task)
|
||||
# For manual cleaning, the target provision state is MANAGEABLE, whereas
|
||||
# for automated cleaning, it is AVAILABLE.
|
||||
node_history_record(node, event=errmsg, event_type=states.SERVICING,
|
||||
error=True)
|
||||
# NOTE(dtantsur): avoid overwriting existing maintenance_reason
|
||||
if not node.maintenance_reason and set_maintenance:
|
||||
node.maintenance_reason = errmsg
|
||||
|
||||
if CONF.conductor.poweroff_in_servicefail:
|
||||
# NOTE(NobodyCam): Power off node in service fail
|
||||
node_power_action(task, states.POWER_OFF)
|
||||
|
||||
node.save()
|
||||
|
||||
if set_fail_state and node.provision_state != states.SERVICEFAIL:
|
||||
task.process_event('fail')
|
||||
|
@ -356,6 +356,13 @@ opts = [
|
||||
'when using Cleaning to perform '
|
||||
'hardware-transformative actions such as '
|
||||
'firmware upgrade.')),
|
||||
cfg.BoolOpt('poweroff_in_servicefail',
|
||||
default=False,
|
||||
help=_('If True power off nodes in the ``service failed`` '
|
||||
'state. Default False. Option may be unsafe '
|
||||
'when using service to perform '
|
||||
'hardware-transformative actions such as '
|
||||
'firmware upgrade.')),
|
||||
cfg.BoolOpt('permit_child_node_step_async_result',
|
||||
default=False,
|
||||
mutable=True,
|
||||
|
@ -128,7 +128,24 @@ opts = [
|
||||
'different CLID/IAID. Due to non-identical identifiers '
|
||||
'multiple addresses must be reserved for the host to '
|
||||
'ensure each step of the boot process can successfully '
|
||||
'lease addresses.'))
|
||||
'lease addresses.')),
|
||||
cfg.StrOpt('servicing_network',
|
||||
mutable=True,
|
||||
help=_('Neutron network UUID or name for booting the ramdisk '
|
||||
'for service mode. Required for "neutron" '
|
||||
'network interface, if service mode will be used. It '
|
||||
'is not used for the "flat" or "noop" network '
|
||||
'interfaces. If a name is provided, it must be unique '
|
||||
'among all networks or service will fail.')),
|
||||
cfg.ListOpt('servicing_network_security_groups',
|
||||
default=[],
|
||||
mutable=True,
|
||||
help=_('List of Neutron Security Group UUIDs to be applied '
|
||||
'during the node service process. Optional for the '
|
||||
'"neutron" network interface and not used for the '
|
||||
'"flat" or "noop" network interfaces. If not '
|
||||
'specified, the default security group is used.')),
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
@ -0,0 +1,35 @@
|
||||
# All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""Add service_steps
|
||||
|
||||
Revision ID: aa2384fee727
|
||||
Revises: d163df1bab88
|
||||
Create Date: 2023-05-25 11:50:05.285602
|
||||
|
||||
"""
|
||||
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = 'aa2384fee727'
|
||||
down_revision = 'd163df1bab88'
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.add_column('nodes', sa.Column('service_step', sa.Text(),
|
||||
nullable=True))
|
@ -217,6 +217,7 @@ class NodeBase(Base):
|
||||
secure_boot = Column(Boolean, nullable=True)
|
||||
shard = Column(String(255), nullable=True)
|
||||
parent_node = Column(String(36), nullable=True)
|
||||
service_step = Column(db_types.JsonEncodedDict)
|
||||
|
||||
|
||||
class Node(NodeBase):
|
||||
|
@ -247,6 +247,7 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
|
||||
instance.clean_steps = []
|
||||
instance.deploy_steps = []
|
||||
instance.verify_steps = []
|
||||
instance.service_steps = []
|
||||
for n, method in inspect.getmembers(instance, inspect.ismethod):
|
||||
if getattr(method, '_is_clean_step', False):
|
||||
# Create a CleanStep to represent this method
|
||||
@ -271,6 +272,15 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
|
||||
'priority': method._verify_step_priority,
|
||||
'interface': instance.interface_type}
|
||||
instance.verify_steps.append(step)
|
||||
if getattr(method, '_is_service_step', False):
|
||||
step = {'step': method.__name__,
|
||||
'priority': method._service_step_priority,
|
||||
'abortable': method._service_step_abortable,
|
||||
'argsinfo': method._service_step_argsinfo,
|
||||
'interface': instance.interface_type,
|
||||
'requires_ramdisk':
|
||||
method._service_step_requires_ramdisk}
|
||||
instance.service_steps.append(step)
|
||||
|
||||
if instance.clean_steps:
|
||||
LOG.debug('Found clean steps %(steps)s for interface '
|
||||
@ -287,6 +297,11 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
|
||||
'%(interface)s',
|
||||
{'steps': instance.deploy_steps,
|
||||
'interface': instance.interface_type})
|
||||
if instance.service_steps:
|
||||
LOG.debug('Found service steps %(steps)s for interface '
|
||||
'%(interface)s',
|
||||
{'steps': instance.service_steps,
|
||||
'interface': instance.interface_type})
|
||||
|
||||
return instance
|
||||
|
||||
@ -411,6 +426,35 @@ class BaseInterface(object, metaclass=abc.ABCMeta):
|
||||
"""
|
||||
return self._execute_step(task, step)
|
||||
|
||||
def get_service_steps(self, task):
|
||||
"""Get a list of service steps for the interface.
|
||||
|
||||
This function will return all service steps (both enabled and disabled)
|
||||
for the interface, in an unordered list.
|
||||
|
||||
:param task: A TaskManager object, useful for interfaces overriding
|
||||
this function
|
||||
:raises NodeServiceFailure: if there is a problem getting the steps
|
||||
from the driver. For example, when a node (using an agent driver)
|
||||
has just been enrolled and the agent isn't alive yet to be queried
|
||||
for the available clean steps.
|
||||
:returns: A list of clean step dictionaries
|
||||
"""
|
||||
return self.service_steps
|
||||
|
||||
def execute_service_step(self, task, step):
|
||||
"""Execute the service step on task.node.
|
||||
|
||||
A verify step must take a single positional argument: a TaskManager
|
||||
object. It does not take keyword variable arguments.
|
||||
|
||||
:param task: A TaskManager object
|
||||
:param step: The deploy step dictionary representing the step to
|
||||
execute
|
||||
:returns: None if this method has completed synchronously
|
||||
"""
|
||||
return self._execute_step(task, step)
|
||||
|
||||
|
||||
class DeployInterface(BaseInterface):
|
||||
"""Interface for deploy-related actions."""
|
||||
@ -545,6 +589,38 @@ class DeployInterface(BaseInterface):
|
||||
'the driver %(driver)s does not support heartbeating',
|
||||
{'node': task.node.uuid, 'driver': task.node.driver})
|
||||
|
||||
def tear_down_service(self, task):
|
||||
"""Tear down after servicing is completed.
|
||||
|
||||
Given that servicing is complete, do all cleanup and tear
|
||||
down necessary to allow the node to be returned to an active
|
||||
state.
|
||||
|
||||
:param task: A TaskManager instance containing the node to act on.
|
||||
"""
|
||||
pass
|
||||
|
||||
def prepare_service(self, task):
|
||||
"""Prepare the node for servicing tasks.
|
||||
|
||||
For example, nodes that use the Ironic Python Agent will need to
|
||||
boot the ramdisk in order to do in-band service tasks.
|
||||
|
||||
If the function is asynchronous, the driver will need to handle
|
||||
settings node.driver_internal_info['service_steps'] and
|
||||
node.service_step, as they would be set in
|
||||
ironic.conductor.manager._do_node_service, but cannot be set when
|
||||
this is asynchronous. After, the interface should make an RPC call
|
||||
to continue_node_servicing to start cleaning.
|
||||
|
||||
:param task: A TaskManager instance containing the node to act on.
|
||||
:returns: If this function is going to be asynchronous, should return
|
||||
`states.SERVICEWAIT`. Otherwise, should return `None`.
|
||||
The interface will need to call _get_cleaning_steps and then RPC
|
||||
to continue_node_service.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BootInterface(BaseInterface):
|
||||
"""Interface for boot-related actions."""
|
||||
@ -1710,6 +1786,28 @@ class NetworkInterface(BaseInterface):
|
||||
"""
|
||||
return task.node.network_data or {}
|
||||
|
||||
def add_servicing_network(self, task):
|
||||
"""Add the servicing network to the node.
|
||||
|
||||
:param task: A TaskManager instance.
|
||||
:returns: a dictionary in the form {port.uuid: neutron_port['id']}
|
||||
:raises: NetworkError
|
||||
:raises: InvalidParameterValue, if the network interface configuration
|
||||
is invalid.
|
||||
"""
|
||||
return {}
|
||||
|
||||
def remove_servicing_network(self, task):
|
||||
"""Removes the servicing network from a node.
|
||||
|
||||
:param task: A TaskManager instance.
|
||||
:raises: NetworkError
|
||||
:raises: InvalidParameterValue, if the network interface configuration
|
||||
is invalid.
|
||||
:raises: MissingParameterValue, if some parameters are missing.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class StorageInterface(BaseInterface, metaclass=abc.ABCMeta):
|
||||
"""Base class for storage interfaces."""
|
||||
@ -2023,3 +2121,82 @@ def verify_step(priority):
|
||||
|
||||
return func
|
||||
return decorator
|
||||
|
||||
|
||||
def service_step(priority=None, abortable=False, argsinfo=None,
|
||||
requires_ramdisk=True):
|
||||
"""Decorator for service steps.
|
||||
|
||||
Service steps may be used in performing service upon a node.
|
||||
|
||||
For service, the steps will be executed in a similar fashion
|
||||
to cleaning, but the steps and order of execution must be
|
||||
explicitly specified by the user when invoking the servicing API.
|
||||
|
||||
Decorated service steps must take as the only a single positional
|
||||
argument, a TaskManager object, in addition to a keyword arguments
|
||||
variable (as described in argsinfo).
|
||||
|
||||
Service steps can be either synchronous or asynchronous. If the step is
|
||||
synchronous, it should return `None` when finished, and the conductor
|
||||
will continue on to the next step. While the clean step is executing, the
|
||||
node will be in `states.SERVICING` provision state. If the step is
|
||||
asynchronous, the step should return `states.SERVICEWAIT` to the
|
||||
conductor before it starts the asynchronous work. When the step is
|
||||
complete, the step should make an RPC call to `continue_node_service` to
|
||||
move to the next step in servicing. The node will be in
|
||||
`states.SERVICEWAIT` provision state during the asynchronous work.
|
||||
|
||||
Examples::
|
||||
|
||||
class MyInterface(base.BaseInterface):
|
||||
@base.service_step()
|
||||
def example_service(self, task):
|
||||
# do some service actions
|
||||
|
||||
@base.service_step(priority=0, abortable=True, argsinfo=
|
||||
{'size': {'description': 'size of widget (MB)',
|
||||
'required': True}})
|
||||
def advanced_service(self, task, **kwargs):
|
||||
# do some advanced magical service
|
||||
|
||||
:param priority: an integer priority, defaults to None which maps to 0.
|
||||
Priorities are not considered, by default but exists
|
||||
should this functionality be adopted later on to align
|
||||
with the steps framework.
|
||||
:param abortable: Boolean value. Whether the clean step is abortable
|
||||
or not; defaults to False.
|
||||
:param argsinfo: a dictionary of keyword arguments where key is the name of
|
||||
the argument and value is a dictionary as follows::
|
||||
|
||||
'description': <description>. Required. This should include
|
||||
possible values.
|
||||
'required': Boolean. Optional; default is False. True if this
|
||||
argument is required. If so, it must be specified in
|
||||
the service request; false if it is optional.
|
||||
:param requires_ramdisk: Whether this step requires the ramdisk
|
||||
to be running. Should be set to False for purely out-of-band steps.
|
||||
:raises InvalidParameterValue: if any of the arguments are invalid
|
||||
"""
|
||||
def decorator(func):
|
||||
func._is_service_step = True
|
||||
if isinstance(priority, int):
|
||||
func._service_step_priority = priority
|
||||
else:
|
||||
# Service steps are only invoked by operators in a model
|
||||
# like manual cleaning, so there is no need to explicitly
|
||||
# require it on the decorator.
|
||||
func._service_step_priority = 0
|
||||
|
||||
if isinstance(abortable, bool):
|
||||
func._service_step_abortable = abortable
|
||||
else:
|
||||
raise exception.InvalidParameterValue(
|
||||
_('"abortable" must be a Boolean value instead of "%s"')
|
||||
% abortable)
|
||||
|
||||
_validate_argsinfo(argsinfo)
|
||||
func._service_step_argsinfo = argsinfo
|
||||
func._service_step_requires_ramdisk = requires_ramdisk
|
||||
return func
|
||||
return decorator
|
||||
|
@ -32,6 +32,7 @@ from ironic.common import states
|
||||
from ironic.common import utils
|
||||
from ironic.conductor import cleaning
|
||||
from ironic.conductor import deployments
|
||||
from ironic.conductor import servicing
|
||||
from ironic.conductor import steps as conductor_steps
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils as manager_utils
|
||||
@ -85,21 +86,23 @@ VENDOR_PROPERTIES = {
|
||||
|
||||
__HEARTBEAT_RECORD_ONLY = (states.ENROLL, states.MANAGEABLE, states.AVAILABLE,
|
||||
states.CLEANING, states.DEPLOYING, states.RESCUING,
|
||||
states.DEPLOYHOLD, states.CLEANHOLD)
|
||||
states.DEPLOYHOLD, states.CLEANHOLD,
|
||||
states.SERVICING, states.SERVICEHOLD)
|
||||
_HEARTBEAT_RECORD_ONLY = frozenset(__HEARTBEAT_RECORD_ONLY)
|
||||
|
||||
_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT,
|
||||
# These are allowed but don't cause any actions since
|
||||
# they're also in HEARTBEAT_RECORD_ONLY.
|
||||
states.DEPLOYING, states.CLEANING, states.RESCUING,
|
||||
states.DEPLOYHOLD, states.CLEANHOLD)
|
||||
states.DEPLOYHOLD, states.CLEANHOLD, states.SERVICING,
|
||||
states.SERVICEWAIT, states.SERVICEHOLD)
|
||||
HEARTBEAT_ALLOWED = frozenset(_HEARTBEAT_ALLOWED)
|
||||
|
||||
_FASTTRACK_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT,
|
||||
states.RESCUEWAIT, states.ENROLL,
|
||||
states.MANAGEABLE, states.AVAILABLE,
|
||||
states.DEPLOYING, states.CLEANHOLD,
|
||||
states.DEPLOYHOLD)
|
||||
states.DEPLOYHOLD, states.SERVICEHOLD)
|
||||
FASTTRACK_HEARTBEAT_ALLOWED = frozenset(_FASTTRACK_HEARTBEAT_ALLOWED)
|
||||
|
||||
|
||||
@ -164,11 +167,12 @@ def _get_post_step_hook(node, step_type):
|
||||
"""Get post clean/deploy step hook for the currently executing step.
|
||||
|
||||
:param node: a node object
|
||||
:param step_type: 'clean' or 'deploy'
|
||||
:param step_type: 'clean' or 'deploy' or 'service'
|
||||
:returns: a method if there is a post clean step hook for this clean
|
||||
step; None otherwise
|
||||
"""
|
||||
step_obj = node.clean_step if step_type == 'clean' else node.deploy_step
|
||||
|
||||
step_obj = getattr(node, "%s_step" % step_type)
|
||||
interface = step_obj.get('interface')
|
||||
step = step_obj.get('step')
|
||||
try:
|
||||
@ -178,17 +182,16 @@ def _get_post_step_hook(node, step_type):
|
||||
|
||||
|
||||
def _post_step_reboot(task, step_type):
|
||||
"""Reboots a node out of band after a clean/deploy step that requires it.
|
||||
"""Reboots a node out of band after a step that requires it.
|
||||
|
||||
If an agent step has 'reboot_requested': True, reboots the node when
|
||||
the step is completed. Will put the node in CLEANFAIL/DEPLOYFAIL if
|
||||
the node cannot be rebooted.
|
||||
|
||||
:param task: a TaskManager instance
|
||||
:param step_type: 'clean' or 'deploy'
|
||||
:param step_type: 'clean' or 'deploy' or 'service'
|
||||
"""
|
||||
current_step = (task.node.clean_step if step_type == 'clean'
|
||||
else task.node.deploy_step)
|
||||
current_step = getattr(task.node, '%s_step' % step_type)
|
||||
try:
|
||||
# NOTE(fellypefca): ensure that the baremetal node boots back into
|
||||
# the ramdisk after reboot.
|
||||
@ -205,16 +208,22 @@ def _post_step_reboot(task, step_type):
|
||||
if step_type == 'clean':
|
||||
manager_utils.cleaning_error_handler(task, msg,
|
||||
traceback=traceback)
|
||||
else:
|
||||
elif step_type == 'deploy':
|
||||
manager_utils.deploying_error_handler(task, msg,
|
||||
traceback=traceback)
|
||||
elif step_type == 'service':
|
||||
manager_utils.servicing_error_handler(task, msg,
|
||||
traceback=traceback)
|
||||
return
|
||||
|
||||
# Signify that we've rebooted
|
||||
if step_type == 'clean':
|
||||
task.node.set_driver_internal_info('cleaning_reboot', True)
|
||||
else:
|
||||
elif step_type == 'deploy':
|
||||
task.node.set_driver_internal_info('deployment_reboot', True)
|
||||
elif step_type == 'service':
|
||||
task.node.set_driver_internal_info('servicing_reboot', True)
|
||||
|
||||
if not task.node.driver_internal_info.get(
|
||||
'agent_secret_token_pregenerated', False):
|
||||
# Wipes out the existing recorded token because the machine will
|
||||
@ -261,8 +270,7 @@ def _get_completed_command(task, commands, step_type):
|
||||
|
||||
last_result = last_command.get('command_result') or {}
|
||||
last_step = last_result.get('%s_step' % step_type)
|
||||
current_step = (task.node.clean_step if step_type == 'clean'
|
||||
else task.node.deploy_step)
|
||||
current_step = getattr(task.node, '%s_step' % step_type)
|
||||
if last_command['command_status'] == 'RUNNING':
|
||||
LOG.debug('%(type)s step still running for node %(node)s: %(step)s',
|
||||
{'step': last_step, 'node': task.node.uuid,
|
||||
@ -410,7 +418,10 @@ def _continue_steps(task, step_type):
|
||||
cleaning.continue_node_clean(task)
|
||||
else:
|
||||
task.process_event('resume')
|
||||
if step_type == 'deploy':
|
||||
deployments.continue_node_deploy(task)
|
||||
else:
|
||||
servicing.continue_node_service(task)
|
||||
|
||||
|
||||
class HeartbeatMixin(object):
|
||||
@ -439,11 +450,18 @@ class HeartbeatMixin(object):
|
||||
"""
|
||||
return self.refresh_steps(task, 'clean')
|
||||
|
||||
def process_next_step(self, task, step_type):
|
||||
"""Start the next clean/deploy step if the previous one is complete.
|
||||
def refresh_service_steps(self, task):
|
||||
"""Refresh the node's cached service steps
|
||||
|
||||
:param task: a TaskManager instance
|
||||
:param step_type: "clean" or "deploy"
|
||||
"""
|
||||
return self.refresh_steps(task, 'service')
|
||||
|
||||
def process_next_step(self, task, step_type):
|
||||
"""Start the next step if the previous one is complete.
|
||||
|
||||
:param task: a TaskManager instance
|
||||
:param step_type: "clean", "deploy", "service"
|
||||
"""
|
||||
|
||||
def continue_cleaning(self, task):
|
||||
@ -453,6 +471,13 @@ class HeartbeatMixin(object):
|
||||
"""
|
||||
return self.process_next_step(task, 'clean')
|
||||
|
||||
def continue_servicing(self, task):
|
||||
"""Start the next cleaning step if the previous one is complete.
|
||||
|
||||
:param task: a TaskManager instance
|
||||
"""
|
||||
return self.process_next_step(task, 'service')
|
||||
|
||||
def heartbeat_allowed(self, node):
|
||||
if utils.fast_track_enabled(node):
|
||||
return node.provision_state in FASTTRACK_HEARTBEAT_ALLOWED
|
||||
@ -480,6 +505,12 @@ class HeartbeatMixin(object):
|
||||
'maintenance mode', node.uuid)
|
||||
last_error = _('Rescue aborted as node is in maintenance mode')
|
||||
manager_utils.rescuing_error_handler(task, last_error)
|
||||
elif (node.provision_state in (states.SERVICING, states.SERVICEWAIT)
|
||||
and not CONF.conductor.allow_provisioning_in_maintenance):
|
||||
LOG.error('Aborting service for node %s, as it is in '
|
||||
'maintenance mode', node.uuid)
|
||||
last_error = _('Service aborted as node is in maintenance mode')
|
||||
manager_utils.servicing_error_handler(task, last_error)
|
||||
else:
|
||||
LOG.warning('Heartbeat from node %(node)s in '
|
||||
'maintenance mode; not taking any action.',
|
||||
@ -559,6 +590,37 @@ class HeartbeatMixin(object):
|
||||
states.RESCUEWAIT):
|
||||
manager_utils.rescuing_error_handler(task, last_error)
|
||||
|
||||
def _heartbeat_service_wait(self, task):
|
||||
node = task.node
|
||||
msg = _('Failed checking if service is done')
|
||||
try:
|
||||
node.touch_provisioning()
|
||||
if not node.service_step:
|
||||
LOG.debug('Node %s just booted to start %s service',
|
||||
node.uuid)
|
||||
msg = _('Node failed to start the first service step')
|
||||
task.process_event('resume')
|
||||
# First, cache the service steps
|
||||
self.refresh_service_steps(task)
|
||||
# Then set/verify node servicesteps and start service
|
||||
conductor_steps.set_node_service_steps(task)
|
||||
servicing.continue_node_service(task)
|
||||
else:
|
||||
msg = _('Node failed to check service progress')
|
||||
# Check if the driver is polling for completion of a step,
|
||||
# via the 'cleaning_polling' flag.
|
||||
polling = node.driver_internal_info.get(
|
||||
'service_polling', False)
|
||||
if not polling:
|
||||
self.continue_servicing(task)
|
||||
except Exception as e:
|
||||
last_error = _('%(msg)s: %(exc)s') % {'msg': msg, 'exc': e}
|
||||
log_msg = ('Asynchronous exception for node %(node)s: %(err)s' %
|
||||
{'node': task.node.uuid, 'err': last_error})
|
||||
if node.provision_state in (states.SERVICING, states.SERVICEWAIT):
|
||||
manager_utils.servicing_error_handler(task, log_msg,
|
||||
errmsg=last_error)
|
||||
|
||||
@METRICS.timer('HeartbeatMixin.heartbeat')
|
||||
def heartbeat(self, task, callback_url, agent_version,
|
||||
agent_verify_ca=None, agent_status=None,
|
||||
@ -616,13 +678,14 @@ class HeartbeatMixin(object):
|
||||
|
||||
if node.maintenance:
|
||||
return self._heartbeat_in_maintenance(task)
|
||||
|
||||
if node.provision_state == states.DEPLOYWAIT:
|
||||
self._heartbeat_deploy_wait(task)
|
||||
elif node.provision_state == states.CLEANWAIT:
|
||||
self._heartbeat_clean_wait(task)
|
||||
elif node.provision_state == states.RESCUEWAIT:
|
||||
self._heartbeat_rescue_wait(task)
|
||||
elif node.provision_state == states.SERVICEWAIT:
|
||||
self._heartbeat_service_wait(task)
|
||||
|
||||
def _finalize_rescue(self, task):
|
||||
"""Call ramdisk to prepare rescue mode and verify result.
|
||||
@ -744,6 +807,35 @@ class AgentBaseMixin(object):
|
||||
deploy_utils.tear_down_inband_cleaning(
|
||||
task, manage_boot=self.should_manage_boot(task))
|
||||
|
||||
@METRICS.timer('AgentBaseMixin.prepare_cleaning')
|
||||
def prepare_service(self, task):
|
||||
"""Boot into the agent to prepare for cleaning.
|
||||
|
||||
:param task: a TaskManager object containing the node
|
||||
:raises: NodeCleaningFailure, NetworkError if the previous cleaning
|
||||
ports cannot be removed or if new cleaning ports cannot be created.
|
||||
:raises: InvalidParameterValue if cleaning network UUID config option
|
||||
has an invalid value.
|
||||
:returns: states.CLEANWAIT to signify an asynchronous prepare
|
||||
"""
|
||||
result = deploy_utils.prepare_inband_service(
|
||||
task, manage_boot=self.should_manage_boot(task))
|
||||
if result is None:
|
||||
# Fast-track, ensure the steps are available.
|
||||
self.refresh_steps(task, 'service')
|
||||
return result
|
||||
|
||||
@METRICS.timer('AgentBaseMixin.tear_down_service')
|
||||
def tear_down_service(self, task):
|
||||
"""Clean up the PXE and DHCP files after cleaning.
|
||||
|
||||
:param task: a TaskManager object containing the node
|
||||
:raises: NodeServiceFailure, NetworkError if the cleaning ports cannot
|
||||
be removed
|
||||
"""
|
||||
deploy_utils.tear_down_inband_service(
|
||||
task, manage_boot=self.should_manage_boot(task))
|
||||
|
||||
@METRICS.timer('AgentBaseMixin.get_clean_steps')
|
||||
def get_clean_steps(self, task):
|
||||
"""Get the list of clean steps from the agent.
|
||||
@ -785,7 +877,6 @@ class AgentBaseMixin(object):
|
||||
'Previously cached steps: %(steps)s',
|
||||
{'node': node.uuid, 'type': step_type,
|
||||
'steps': previous_steps})
|
||||
|
||||
client = agent_client.get_client(task)
|
||||
call = getattr(client, 'get_%s_steps' % step_type)
|
||||
try:
|
||||
@ -936,32 +1027,34 @@ class AgentBaseMixin(object):
|
||||
set to True, this method will coordinate the reboot once the step is
|
||||
completed.
|
||||
"""
|
||||
assert step_type in ('clean', 'deploy')
|
||||
assert step_type in ('clean', 'deploy', 'service')
|
||||
|
||||
node = task.node
|
||||
client = agent_client.get_client(task)
|
||||
agent_commands = client.get_commands_status(task.node)
|
||||
|
||||
if _freshly_booted(agent_commands, step_type):
|
||||
field = ('cleaning_reboot' if step_type == 'clean'
|
||||
else 'deployment_reboot')
|
||||
if step_type == 'clean':
|
||||
field = 'cleaning_reboot'
|
||||
elif step_type == 'service':
|
||||
field = 'servicing_reboot'
|
||||
else:
|
||||
# TODO(TheJulia): One day we should standardize the field
|
||||
# names here, but we also need to balance human ability
|
||||
# to understand what is going on so *shrug*.
|
||||
field = 'deployment_reboot'
|
||||
utils.pop_node_nested_field(node, 'driver_internal_info', field)
|
||||
node.save()
|
||||
return _continue_steps(task, step_type)
|
||||
|
||||
current_step = (node.clean_step if step_type == 'clean'
|
||||
else node.deploy_step)
|
||||
current_step = getattr(node, '%s_step' % step_type)
|
||||
command = _get_completed_command(task, agent_commands, step_type)
|
||||
LOG.debug('%(type)s command status for node %(node)s on step %(step)s:'
|
||||
' %(command)s', {'node': node.uuid,
|
||||
'step': current_step,
|
||||
'command': command,
|
||||
'type': step_type})
|
||||
|
||||
if not command:
|
||||
# Agent command in progress
|
||||
return
|
||||
|
||||
if command.get('command_status') == 'FAILED':
|
||||
msg = (_('%(type)s step %(step)s failed on node %(node)s. '
|
||||
'%(err)s') %
|
||||
|
@ -771,6 +771,86 @@ def tear_down_inband_cleaning(task, manage_boot=True):
|
||||
task, power_state_to_restore)
|
||||
|
||||
|
||||
def prepare_inband_service(self, task):
|
||||
"""Boot a service ramdisk on the node.
|
||||
|
||||
:param task: a TaskManager instance.
|
||||
:raises: NetworkError if the tenant ports cannot be removed.
|
||||
:raises: InvalidParameterValue when the wrong power state is specified
|
||||
or the wrong driver info is specified for power management.
|
||||
:raises: other exceptions by the node's power driver if something
|
||||
wrong occurred during the power action.
|
||||
:raises: any boot interface's prepare_ramdisk exceptions.
|
||||
:returns: Returns states.SERVICEWAIT
|
||||
"""
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
# NOTE(TheJulia): Revealing that the power is off at any time can
|
||||
# cause external power sync to decide that the node must be off.
|
||||
# This may result in a post-rescued instance being turned off
|
||||
# unexpectedly after rescue has started.
|
||||
# TODO(TheJulia): Once we have power/state callbacks to nova,
|
||||
# the reset of the power_state can be removed.
|
||||
task.node.power_state = states.POWER_ON
|
||||
task.node.save()
|
||||
|
||||
task.driver.boot.clean_up_instance(task)
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
task.driver.network.add_service_network(task)
|
||||
if CONF.agent.manage_agent_boot:
|
||||
# prepare_ramdisk will set the boot device
|
||||
prepare_agent_boot(task)
|
||||
manager_utils.node_power_action(task, states.POWER_ON)
|
||||
|
||||
return states.SERVICEWAIT
|
||||
|
||||
|
||||
def tear_down_inband_service(task, manage_boot=True):
|
||||
"""Tears down the environment setup for in-band service.
|
||||
|
||||
This method does the following:
|
||||
1. Powers off the bare metal node (unless the node is fast
|
||||
tracked or there was a service failure).
|
||||
2. If 'manage_boot' parameter is set to true, it also calls
|
||||
the 'clean_up_ramdisk' method of boot interface to clean
|
||||
up the environment that was set for booting agent ramdisk.
|
||||
3. Deletes the cleaning ports which were setup as part
|
||||
of cleaning.
|
||||
|
||||
:param task: a TaskManager object containing the node
|
||||
:param manage_boot: If this is set to True, this method calls the
|
||||
'clean_up_ramdisk' method of boot interface to boot the agent
|
||||
ramdisk. If False, it skips this step.
|
||||
:raises: NetworkError, NodeServiceFailure if the cleaning ports cannot be
|
||||
removed.
|
||||
"""
|
||||
node = task.node
|
||||
service_failure = (node.fault == faults.SERVICE_FAILURE)
|
||||
|
||||
if not service_failure:
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
|
||||
if manage_boot:
|
||||
task.driver.boot.clean_up_ramdisk(task)
|
||||
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.remove_service_network(task)
|
||||
|
||||
if not service_failure:
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
|
||||
with manager_utils.power_state_for_network_configuration(task):
|
||||
task.driver.network.remove_service_network(task)
|
||||
task.driver.network.configure_tenant_networks(task)
|
||||
task.driver.boot.prepare_instance(task)
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
|
||||
# Change the task instead of return the state.
|
||||
task.process_event('done')
|
||||
|
||||
|
||||
def get_image_instance_info(node):
|
||||
"""Gets the image information from the node.
|
||||
|
||||
|
@ -224,6 +224,7 @@ class FakeVendorB(base.VendorInterface):
|
||||
sleep(CONF.fake.vendor_delay)
|
||||
return True if bar == 'woof' else False
|
||||
|
||||
@base.service_step(requires_ramdisk=False)
|
||||
@base.clean_step(priority=1)
|
||||
@base.passthru(['POST'],
|
||||
description=_("Test pass-through to wait."))
|
||||
@ -234,6 +235,10 @@ class FakeVendorB(base.VendorInterface):
|
||||
# NOTE(TheJulia): Step methods invoked via an API *cannot*
|
||||
# have return values
|
||||
|
||||
@base.service_step()
|
||||
def trigger_servicewait(self, task, **kwargs):
|
||||
return states.SERVICEWAIT
|
||||
|
||||
|
||||
class FakeConsole(base.ConsoleInterface):
|
||||
"""Example implementation of a simple console interface."""
|
||||
|
@ -217,3 +217,28 @@ class FlatNetwork(common.NeutronVIFPortIDMixin,
|
||||
"""
|
||||
return self._remove_service_network(
|
||||
task, self.get_inspection_network_uuid(task), 'inspection')
|
||||
|
||||
def add_servicing_network(self, task):
|
||||
"""Add the rescuing network to a node.
|
||||
|
||||
Flat network does not use the servicing network.
|
||||
Bind the port again since unconfigure_tenant_network() unbound it.
|
||||
|
||||
:param task: A TaskManager instance.
|
||||
:returns: a dictionary in the form {port.uuid: neutron_port['id']}
|
||||
:raises: NetworkError, InvalidParameterValue
|
||||
"""
|
||||
LOG.info('Bind ports for servicing node %s', task.node.uuid)
|
||||
self._bind_flat_ports(task)
|
||||
|
||||
def remove_servicing_network(self, task):
|
||||
"""Remove the servicing network from a node.
|
||||
|
||||
Flat network does not use the servicing network.
|
||||
Unbind the port again since add_rescuing_network() bound it.
|
||||
|
||||
:param task: A TaskManager instance.
|
||||
:raises: NetworkError
|
||||
"""
|
||||
LOG.info('Unbind ports for servicing node %s', task.node.uuid)
|
||||
self._unbind_flat_ports(task)
|
||||
|
@ -265,3 +265,33 @@ class NeutronNetwork(common.NeutronVIFPortIDMixin,
|
||||
"""
|
||||
return self._remove_network(
|
||||
task, self.get_inspection_network_uuid(task), 'inspection')
|
||||
|
||||
def validate_servicing(self, task):
|
||||
"""Validates the network interface for servicing operation.
|
||||
|
||||
:param task: a TaskManager instance.
|
||||
:raises: InvalidParameterValue, if the network interface configuration
|
||||
is invalid.
|
||||
:raises: MissingParameterValue, if some parameters are missing.
|
||||
"""
|
||||
self.get_servicing_network_uuid(task)
|
||||
|
||||
def add_servicing_network(self, task):
|
||||
"""Create neutron ports for each port to boot the servicing ramdisk.
|
||||
|
||||
:param task: a TaskManager instance.
|
||||
:returns: a dictionary in the form {port.uuid: neutron_port['id']}
|
||||
"""
|
||||
return self._add_network(
|
||||
task, self.get_servicing_network_uuid(task),
|
||||
CONF.neutron.servicing_network_security_groups,
|
||||
'servicing')
|
||||
|
||||
def remove_servicing_network(self, task):
|
||||
"""Deletes neutron port created for booting the servicing ramdisk.
|
||||
|
||||
:param task: a TaskManager instance.
|
||||
:raises: NetworkError
|
||||
"""
|
||||
return self._remove_network(
|
||||
task, self.get_servicing_network_uuid(task), 'servicing')
|
||||
|
@ -81,7 +81,8 @@ class Node(base.IronicObject, object_base.VersionedObjectDictCompat):
|
||||
# Version 1.37: Add shard field
|
||||
# Version 1.38: Add parent_node field
|
||||
# Version 1.39: Add firmware_interface field
|
||||
VERSION = '1.39'
|
||||
# Version 1.40: Add service_step field
|
||||
VERSION = '1.40'
|
||||
|
||||
dbapi = db_api.get_instance()
|
||||
|
||||
@ -107,6 +108,11 @@ class Node(base.IronicObject, object_base.VersionedObjectDictCompat):
|
||||
# or has not yet started.
|
||||
'deploy_step': object_fields.FlexibleDictField(nullable=True),
|
||||
|
||||
# A service step dictionary, indicating the current step
|
||||
# being executed, or None, indicating deployment is not in progress
|
||||
# or has not yet started.
|
||||
'service_step': object_fields.FlexibleDictField(nullable=True),
|
||||
|
||||
'raid_config': object_fields.FlexibleDictField(nullable=True),
|
||||
'target_raid_config': object_fields.FlexibleDictField(nullable=True),
|
||||
|
||||
|
@ -145,6 +145,7 @@ class TestListNodes(test_api_base.BaseApiTest):
|
||||
self.assertNotIn('retired_reason', data['nodes'][0])
|
||||
self.assertNotIn('lessee', data['nodes'][0])
|
||||
self.assertNotIn('network_data', data['nodes'][0])
|
||||
self.assertNotIn('service_steps', data['nodes'][0])
|
||||
|
||||
@mock.patch.object(policy, 'check', autospec=True)
|
||||
@mock.patch.object(policy, 'check_policy', autospec=True)
|
||||
@ -223,6 +224,7 @@ class TestListNodes(test_api_base.BaseApiTest):
|
||||
self.assertIn('lessee', data)
|
||||
self.assertNotIn('allocation_id', data)
|
||||
self.assertIn('allocation_uuid', data)
|
||||
self.assertIn('service_step', data)
|
||||
|
||||
def test_get_one_configdrive_dict(self):
|
||||
fake_instance_info = {
|
||||
@ -6489,6 +6491,54 @@ ORHMKeXMO8fcK0By7CiMKwHSXCoEQgfQhWwpMdSsO8LgHCjh87DQc= """
|
||||
self.assertEqual(http_client.NOT_ACCEPTABLE, ret.status_code)
|
||||
mock_dpa.assert_not_called()
|
||||
|
||||
@mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action',
|
||||
autospec=True)
|
||||
def test_unhold_servicehold(self, mock_dpa):
|
||||
self.node.provision_state = states.SERVICEHOLD
|
||||
self.node.save()
|
||||
|
||||
ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid,
|
||||
{'target': states.VERBS['unhold']},
|
||||
headers={api_base.Version.string: "1.86"})
|
||||
self.assertEqual(http_client.ACCEPTED, ret.status_code)
|
||||
self.assertEqual(b'', ret.body)
|
||||
mock_dpa.assert_called_once_with(mock.ANY, mock.ANY, self.node.uuid,
|
||||
states.VERBS['unhold'],
|
||||
'test-topic')
|
||||
|
||||
@mock.patch.object(rpcapi.ConductorAPI, 'do_node_service',
|
||||
autospec=True)
|
||||
def test_service(self, mock_dns):
|
||||
self.node.provision_state = states.SERVICEHOLD
|
||||
self.node.save()
|
||||
|
||||
ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid,
|
||||
{'target': states.VERBS['service'],
|
||||
'service_steps': [{
|
||||
'interface': 'deploy',
|
||||
'step': 'meow'}]},
|
||||
headers={api_base.Version.string: "1.87"})
|
||||
self.assertEqual(http_client.ACCEPTED, ret.status_code)
|
||||
self.assertEqual(b'', ret.body)
|
||||
mock_dns.assert_called_once_with(
|
||||
mock.ANY, mock.ANY, self.node.uuid,
|
||||
[{'interface': 'deploy', 'step': 'meow'}],
|
||||
None, topic='test-topic')
|
||||
|
||||
@mock.patch.object(rpcapi.ConductorAPI, 'do_node_service',
|
||||
autospec=True)
|
||||
def test_service_args_required(self, mock_dns):
|
||||
self.node.provision_state = states.SERVICEHOLD
|
||||
self.node.save()
|
||||
|
||||
ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid,
|
||||
{'target': states.VERBS['service']},
|
||||
headers={api_base.Version.string: "1.87"},
|
||||
expect_errors=True)
|
||||
self.assertEqual(http_client.BAD_REQUEST, ret.status_code)
|
||||
self.assertIn('error_message', ret.json)
|
||||
mock_dns.assert_not_called()
|
||||
|
||||
def test_set_console_mode_enabled(self):
|
||||
with mock.patch.object(rpcapi.ConductorAPI,
|
||||
'set_console_mode',
|
||||
@ -6975,11 +7025,28 @@ class TestCheckCleanSteps(db_base.DbTestCase):
|
||||
|
||||
step1 = {"step": "upgrade_firmware", "interface": "deploy",
|
||||
"args": {"arg1": "value1", "arg2": "value2"}}
|
||||
# NOTE(TheJulia): _check_service_steps and _check_deploy_steps
|
||||
# both route back to _check_steps which is what backs _check
|
||||
# clean steps. It is needful duplication for cases, but it doesn't
|
||||
# make a ton of sense to copy/paste everything over and over unless
|
||||
# there is a specific case. In any case, do the needful here.
|
||||
api_node._check_clean_steps([step1])
|
||||
|
||||
step2 = {"step": "configure raid", "interface": "raid"}
|
||||
step2 = {"step": "configure raid", "interface": "raid",
|
||||
"args": {}}
|
||||
api_node._check_clean_steps([step1, step2])
|
||||
|
||||
api_node._check_service_steps([step1])
|
||||
api_node._check_service_steps([step1, step2])
|
||||
# Schema differences exist, cleaning doesn't have a schema for
|
||||
# priority when validated.
|
||||
|
||||
step1['priority'] = 10
|
||||
step2['priority'] = 12
|
||||
api_node._check_deploy_steps([step1])
|
||||
|
||||
api_node._check_deploy_steps([step1, step2])
|
||||
|
||||
@mock.patch.object(api_utils, 'check_node_policy_and_retrieve',
|
||||
autospec=True)
|
||||
def test__check_clean_steps_child_node(self, mock_policy):
|
||||
@ -7029,6 +7096,15 @@ class TestCheckCleanSteps(db_base.DbTestCase):
|
||||
mock.call('baremetal:node:set_provision_state',
|
||||
child_node_2.uuid)])
|
||||
|
||||
@mock.patch.object(api_node, '_check_steps', autospec=True)
|
||||
def test_check__check_steps_wrappers(self, check_mock):
|
||||
api_node._check_clean_steps({})
|
||||
self.assertEqual(1, check_mock.call_count)
|
||||
api_node._check_deploy_steps({})
|
||||
self.assertEqual(2, check_mock.call_count)
|
||||
api_node._check_service_steps({})
|
||||
self.assertEqual(3, check_mock.call_count)
|
||||
|
||||
|
||||
class TestAttachDetachVif(test_api_base.BaseApiTest):
|
||||
|
||||
|
@ -1422,3 +1422,28 @@ class TestGetPhysnetsByPortUUID(base.TestCase):
|
||||
self.client, port_uuid)
|
||||
mock_gp.assert_called_once_with(self.client, port_uuid)
|
||||
mock_gn.assert_called_once_with(self.client, network_uuid)
|
||||
|
||||
|
||||
class TestNeutronNetworkInterfaceMixin(db_base.DbTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(TestNeutronNetworkInterfaceMixin, self).setUp()
|
||||
self.node = object_utils.create_test_node(self.context)
|
||||
|
||||
def test_get_network_names_and_uuids(self):
|
||||
"""A test to validate confiured overrides work."""
|
||||
for name in ['cleaning', 'provisioning', 'rescuing', 'inspection',
|
||||
'servicing']:
|
||||
method_name = 'get_{}_network_uuid'.format(name)
|
||||
method_to_call = getattr(neutron.NeutronNetworkInterfaceMixin,
|
||||
method_name)
|
||||
network_uuid = uuidutils.generate_uuid()
|
||||
self.node.driver_info = {'%s_network' % name: network_uuid}
|
||||
self.node.save()
|
||||
with mock.patch.object(neutron, 'validate_network',
|
||||
autospec=True) as mock_validate:
|
||||
with task_manager.acquire(self.context,
|
||||
self.node.uuid) as task:
|
||||
method_to_call(self, task)
|
||||
mock_validate.assert_called_once_with(
|
||||
network_uuid, mock.ANY, context=task.context)
|
||||
|
@ -49,6 +49,7 @@ from ironic.conductor import deployments
|
||||
from ironic.conductor import inspection
|
||||
from ironic.conductor import manager
|
||||
from ironic.conductor import notification_utils
|
||||
from ironic.conductor import servicing
|
||||
from ironic.conductor import steps as conductor_steps
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils as conductor_utils
|
||||
@ -8601,3 +8602,49 @@ class ContinueInspectionTestCase(mgr_utils.ServiceSetUpMixin,
|
||||
self.assertEqual(exception.NotFound, exc.exc_info[0])
|
||||
node.refresh()
|
||||
self.assertEqual(states.AVAILABLE, node.provision_state)
|
||||
|
||||
|
||||
@mgr_utils.mock_record_keepalive
|
||||
class DoNodeServiceTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
def setUp(self):
|
||||
super(DoNodeServiceTestCase, self).setUp()
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
|
||||
autospec=True)
|
||||
def test_do_node_service_maintenance(self, mock_validate):
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.ACTIVE,
|
||||
target_provision_state=states.NOSTATE,
|
||||
maintenance=True, maintenance_reason='reason')
|
||||
self._start_service()
|
||||
exc = self.assertRaises(messaging.rpc.ExpectedException,
|
||||
self.service.do_node_service,
|
||||
self.context, node.uuid, {'foo': 'bar'})
|
||||
# Compare true exception hidden by @messaging.expected_exceptions
|
||||
self.assertEqual(exception.NodeInMaintenance, exc.exc_info[0])
|
||||
self.assertFalse(mock_validate.called)
|
||||
|
||||
@mock.patch.object(task_manager.TaskManager, 'process_event',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakePower.validate',
|
||||
autospec=True)
|
||||
def test_do_node_service(self, mock_pv, mock_nv, mock_event):
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.ACTIVE,
|
||||
target_provision_state=states.NOSTATE)
|
||||
self._start_service()
|
||||
self.service.do_node_service(self.context,
|
||||
node.uuid, {'foo': 'bar'})
|
||||
self.assertTrue(mock_pv.called)
|
||||
self.assertTrue(mock_nv.called)
|
||||
mock_event.assert_called_once_with(
|
||||
mock.ANY,
|
||||
'service',
|
||||
callback=mock.ANY,
|
||||
call_args=(servicing.do_node_service, mock.ANY,
|
||||
{'foo': 'bar'}, False),
|
||||
err_handler=mock.ANY, target_state='active')
|
||||
|
@ -722,6 +722,14 @@ class RPCAPITestCase(db_base.DbTestCase):
|
||||
allocation='fake-allocation',
|
||||
version='1.48')
|
||||
|
||||
def test_do_node_service(self):
|
||||
self._test_rpcapi('do_node_service',
|
||||
'call',
|
||||
node_id='fake-node',
|
||||
service_steps={'foo': 'bar'},
|
||||
disable_ramdisk=False,
|
||||
version='1.57')
|
||||
|
||||
@mock.patch.object(rpc, 'GLOBAL_MANAGER',
|
||||
spec_set=conductor_manager.ConductorManager)
|
||||
def test_local_call(self, mock_manager):
|
||||
|
1184
ironic/tests/unit/conductor/test_servicing.py
Normal file
1184
ironic/tests/unit/conductor/test_servicing.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -1059,8 +1059,8 @@ class GetValidatedStepsFromTemplatesTestCase(db_base.DbTestCase):
|
||||
mock_templates):
|
||||
mock_templates.return_value = [self.template]
|
||||
mock_validate.side_effect = exception.InstanceDeployFailure('foo')
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=False) as task:
|
||||
self.assertRaises(
|
||||
exception.InstanceDeployFailure,
|
||||
conductor_steps._get_validated_steps_from_templates, task)
|
||||
@ -1399,3 +1399,87 @@ class ReservedStepHandlerByNameTestCase(db_base.DbTestCase):
|
||||
|
||||
def test_reserved_step_wait_time(self):
|
||||
self._test_reserved_step({'step': 'wait', 'args': {'seconds': 1}})
|
||||
|
||||
|
||||
class NodeServiceStepsTestCase(db_base.DbTestCase):
|
||||
def setUp(self):
|
||||
super(NodeServiceStepsTestCase, self).setUp()
|
||||
|
||||
self.deploy_start = {
|
||||
'step': 'deploy_start', 'priority': 50, 'interface': 'deploy'}
|
||||
self.power_one = {
|
||||
'step': 'power_one', 'priority': 40, 'interface': 'power'}
|
||||
self.deploy_middle = {
|
||||
'step': 'deploy_middle', 'priority': 40, 'interface': 'deploy'}
|
||||
self.deploy_end = {
|
||||
'step': 'deploy_end', 'priority': 20, 'interface': 'deploy'}
|
||||
self.power_disable = {
|
||||
'step': 'power_disable', 'priority': 0, 'interface': 'power'}
|
||||
self.deploy_core = {
|
||||
'step': 'deploy', 'priority': 100, 'interface': 'deploy'}
|
||||
# enabled steps
|
||||
self.service_steps = [self.deploy_start, self.power_one,
|
||||
self.deploy_middle, self.deploy_end]
|
||||
# Deploy step with argsinfo.
|
||||
self.deploy_raid = {
|
||||
'step': 'build_raid', 'priority': 0, 'interface': 'deploy',
|
||||
'argsinfo': {'arg1': {'description': 'desc1', 'required': True},
|
||||
'arg2': {'description': 'desc2'}}}
|
||||
self.node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware')
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.get_service_steps',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakePower.get_service_steps',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeManagement.get_service_steps',
|
||||
autospec=True)
|
||||
def test__get_service_steps(self, mock_mgt_steps, mock_power_steps,
|
||||
mock_deploy_steps):
|
||||
# Test getting deploy steps, with one driver returning None, two
|
||||
# conflicting priorities, and asserting they are ordered properly.
|
||||
|
||||
mock_power_steps.return_value = [self.power_disable, self.power_one]
|
||||
mock_deploy_steps.return_value = [
|
||||
self.deploy_start, self.deploy_middle, self.deploy_end]
|
||||
# These next steps are actually present on the FakeVendorB interface,
|
||||
# and instead of just mock everything, we're actually exercising the
|
||||
# rest of the way down including the decorator to get here.
|
||||
fake_log_passthrough = {
|
||||
'abortable': False, 'argsinfo': None, 'interface': 'vendor',
|
||||
'priority': 0, 'requires_ramdisk': False,
|
||||
'step': 'log_passthrough'
|
||||
}
|
||||
fake_trigger_servicewait = {
|
||||
'abortable': False, 'argsinfo': None, 'interface': 'vendor',
|
||||
'priority': 0, 'requires_ramdisk': True,
|
||||
'step': 'trigger_servicewait'
|
||||
}
|
||||
|
||||
expected = self.service_steps + [fake_log_passthrough,
|
||||
fake_trigger_servicewait,
|
||||
self.power_disable]
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
steps = conductor_steps._get_service_steps(task, enabled=False)
|
||||
|
||||
self.assertEqual(expected, steps)
|
||||
mock_mgt_steps.assert_called_once_with(mock.ANY, task)
|
||||
mock_power_steps.assert_called_once_with(mock.ANY, task)
|
||||
mock_deploy_steps.assert_called_once_with(mock.ANY, task)
|
||||
|
||||
@mock.patch.object(conductor_steps, '_validate_user_service_steps',
|
||||
autospec=True)
|
||||
def test_set_node_service_steps(self, mock_steps):
|
||||
mock_steps.return_value = self.service_steps
|
||||
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
conductor_steps.set_node_service_steps(task)
|
||||
self.node.refresh()
|
||||
self.assertEqual(self.service_steps,
|
||||
self.node.driver_internal_info['service_steps'])
|
||||
self.assertEqual({}, self.node.service_step)
|
||||
self.assertIsNone(
|
||||
self.node.driver_internal_info['service_step_index'])
|
||||
mock_steps.assert_called_once_with(task, [], disable_ramdisk=False)
|
||||
|
@ -1490,6 +1490,90 @@ class ErrorHandlersTestCase(db_base.DbTestCase):
|
||||
log_mock.assert_has_calls(log_calls)
|
||||
self.node.save.assert_called_once_with()
|
||||
|
||||
@mock.patch.object(conductor_utils.LOG, 'error', autospec=True)
|
||||
def _test_servicing_error_handler(self, mock_log_error,
|
||||
prov_state=states.SERVICING):
|
||||
self.node.provision_state = prov_state
|
||||
target = 'baz'
|
||||
self.node.target_provision_state = target
|
||||
self.node.service_step = {'key': 'val'}
|
||||
self.node.set_driver_internal_info('service_reboot', True)
|
||||
self.node.set_driver_internal_info('service_polling', True)
|
||||
self.node.set_driver_internal_info('skip_current_service_step', True)
|
||||
self.node.set_driver_internal_info('service_step_index', 0)
|
||||
self.node.set_driver_internal_info('agent_url', 'url')
|
||||
self.node.set_driver_internal_info('agent_secret_token', 'foo')
|
||||
self.node.set_driver_internal_info('agent_secret_token_pregenerated',
|
||||
False)
|
||||
|
||||
msg = 'error bar'
|
||||
last_error = "last error"
|
||||
conductor_utils.servicing_error_handler(self.task, msg,
|
||||
errmsg=last_error)
|
||||
self.node.save.assert_called_once_with()
|
||||
self.assertEqual({}, self.node.service_step)
|
||||
self.assertNotIn('service_step_index', self.node.driver_internal_info)
|
||||
self.assertNotIn('service_reboot', self.node.driver_internal_info)
|
||||
self.assertNotIn('service_polling', self.node.driver_internal_info)
|
||||
self.assertNotIn('skip_current_service_step',
|
||||
self.node.driver_internal_info)
|
||||
self.assertNotIn('agent_secret_token', self.node.driver_internal_info)
|
||||
self.assertNotIn('agent_secret_token_pregenerated',
|
||||
self.node.driver_internal_info)
|
||||
self.assertEqual(last_error, self.node.last_error)
|
||||
self.assertTrue(self.node.maintenance)
|
||||
self.assertEqual(last_error, self.node.maintenance_reason)
|
||||
self.assertEqual('service failure', self.node.fault)
|
||||
driver = self.task.driver.deploy
|
||||
driver.tear_down_service.assert_called_once_with(self.task)
|
||||
if prov_state == states.SERVICEFAIL:
|
||||
self.assertFalse(self.task.process_event.called)
|
||||
else:
|
||||
self.task.process_event.assert_called_once_with('fail')
|
||||
self.assertNotIn('agent_url', self.node.driver_internal_info)
|
||||
mock_log_error.assert_called_once_with(msg, exc_info=False)
|
||||
|
||||
def test_servicing_error_handler(self):
|
||||
self._test_servicing_error_handler()
|
||||
|
||||
def test_servicing_error_handler_servicewait(self):
|
||||
self._test_servicing_error_handler(prov_state=states.SERVICEWAIT)
|
||||
|
||||
def test_servicing_error_handler_servicefail(self):
|
||||
self._test_servicing_error_handler(prov_state=states.SERVICEFAIL)
|
||||
|
||||
def test_servicing_error_handler_no_teardown(self):
|
||||
target = states.MANAGEABLE
|
||||
self.node.target_provision_state = target
|
||||
conductor_utils.servicing_error_handler(self.task, 'foo',
|
||||
tear_down_service=False)
|
||||
self.assertFalse(self.task.driver.deploy.tear_down_service.called)
|
||||
self.task.process_event.assert_called_once_with('fail')
|
||||
|
||||
def test_servicing_error_handler_no_fail(self):
|
||||
conductor_utils.servicing_error_handler(self.task, 'foo',
|
||||
set_fail_state=False)
|
||||
driver = self.task.driver.deploy
|
||||
driver.tear_down_service.assert_called_once_with(self.task)
|
||||
self.assertFalse(self.task.process_event.called)
|
||||
|
||||
@mock.patch.object(conductor_utils, 'LOG', autospec=True)
|
||||
def test_servicing_error_handler_tear_down_error(self, log_mock):
|
||||
def _side_effect(task):
|
||||
# simulate overwriting last error by another operation (e.g. power)
|
||||
task.node.last_error = None
|
||||
raise Exception('bar')
|
||||
|
||||
driver = self.task.driver.deploy
|
||||
msg = 'foo'
|
||||
driver.tear_down_service.side_effect = _side_effect
|
||||
conductor_utils.servicing_error_handler(self.task, msg)
|
||||
log_mock.error.assert_called_once_with(msg, exc_info=False)
|
||||
self.assertTrue(log_mock.exception.called)
|
||||
self.assertIn(msg, self.node.last_error)
|
||||
self.assertIn(msg, self.node.maintenance_reason)
|
||||
self.assertEqual('service failure', self.node.fault)
|
||||
|
||||
|
||||
class ValidatePortPhysnetTestCase(db_base.DbTestCase):
|
||||
|
||||
@ -2738,3 +2822,46 @@ class GetTokenProjectFromRequestTestCase(db_base.DbTestCase):
|
||||
self.context.auth_token_info = self.auth_token_info
|
||||
res = conductor_utils.get_token_project_from_request(self.context)
|
||||
self.assertEqual('user-project', res)
|
||||
|
||||
|
||||
class ServiceUtilsTestCase(db_base.DbTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(ServiceUtilsTestCase, self).setUp()
|
||||
self.node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
uuid=uuidutils.generate_uuid(),
|
||||
driver_internal_info={
|
||||
'agent_last_heartbeat': str(timeutils.utcnow().isoformat()),
|
||||
'agent_url': 'a_url'})
|
||||
self.config(fast_track=True, group='deploy')
|
||||
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
|
||||
def test_wipe_service_internal_info(self, mock_power):
|
||||
mock_power.return_value = False
|
||||
self.node.driver_internal_info = {
|
||||
'service_steps': {'foo': 'bar'},
|
||||
'agent_cached_service_steps': {'more_foo': None},
|
||||
'service_reboot': False,
|
||||
'service_polling': 1,
|
||||
'service_disable_ramdisk': False,
|
||||
'skip_current_service_step': False,
|
||||
'steps_validated': 'meow'
|
||||
'agent_secret_token'}
|
||||
self.node.save()
|
||||
not_in_list = ['agent_cached_service_steps',
|
||||
'serivce_reboot',
|
||||
'service_polling',
|
||||
'service_disable_ramdisk',
|
||||
'skip_current_service_step',
|
||||
'steps_validated',
|
||||
'agent_secret_token']
|
||||
with task_manager.acquire(self.context, self.node.id,
|
||||
shared=True) as task:
|
||||
conductor_utils.wipe_service_internal_info(task)
|
||||
task.node.save()
|
||||
self.assertIsNone(
|
||||
task.node.driver_internal_info['service_steps']
|
||||
)
|
||||
for field in not_in_list:
|
||||
self.assertNotIn(field, task.node.driver_internal_info)
|
||||
|
@ -238,7 +238,8 @@ def get_test_node(**kw):
|
||||
'boot_mode': kw.get('boot_mode', None),
|
||||
'secure_boot': kw.get('secure_boot', None),
|
||||
'shard': kw.get('shard', None),
|
||||
'parent_node': kw.get('parent_node', None)
|
||||
'parent_node': kw.get('parent_node', None),
|
||||
'service_step': kw.get('service_step'),
|
||||
}
|
||||
|
||||
for iface in drivers_base.ALL_INTERFACES:
|
||||
|
@ -25,6 +25,7 @@ from ironic.common import exception
|
||||
from ironic.common import image_service
|
||||
from ironic.common import states
|
||||
from ironic.conductor import cleaning
|
||||
from ironic.conductor import servicing
|
||||
from ironic.conductor import steps as conductor_steps
|
||||
from ironic.conductor import task_manager
|
||||
from ironic.conductor import utils as manager_utils
|
||||
@ -159,7 +160,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
autospec=True)
|
||||
def test_heartbeat_in_maintenance(self, next_step_mock):
|
||||
# NOTE(pas-ha) checking only for states that are not noop
|
||||
for state in (states.DEPLOYWAIT, states.CLEANWAIT):
|
||||
for state in (states.DEPLOYWAIT, states.CLEANWAIT,
|
||||
states.SERVICEWAIT):
|
||||
next_step_mock.reset_mock()
|
||||
self.node.provision_state = state
|
||||
self.node.maintenance = True
|
||||
@ -186,7 +188,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
group='conductor')
|
||||
for state, expected in [(states.DEPLOYWAIT, states.DEPLOYFAIL),
|
||||
(states.CLEANWAIT, states.CLEANFAIL),
|
||||
(states.RESCUEWAIT, states.RESCUEFAIL)]:
|
||||
(states.RESCUEWAIT, states.RESCUEFAIL),
|
||||
(states.SERVICEWAIT, states.SERVICEFAIL)]:
|
||||
next_step_mock.reset_mock()
|
||||
self.node.provision_state = state
|
||||
self.node.maintenance = True
|
||||
@ -211,7 +214,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
autospec=True)
|
||||
def test_heartbeat_with_reservation(self, next_step_mock):
|
||||
# NOTE(pas-ha) checking only for states that are not noop
|
||||
for state in (states.DEPLOYWAIT, states.CLEANWAIT):
|
||||
for state in (states.DEPLOYWAIT, states.CLEANWAIT,
|
||||
states.SERVICEWAIT):
|
||||
next_step_mock.reset_mock()
|
||||
self.node.provision_state = state
|
||||
self.node.reservation = 'localhost'
|
||||
@ -232,7 +236,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
def test_heartbeat_noops_in_wrong_state(self, next_step_mock, log_mock):
|
||||
allowed = {states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT,
|
||||
states.DEPLOYING, states.CLEANING, states.RESCUING,
|
||||
states.DEPLOYHOLD, states.CLEANHOLD}
|
||||
states.DEPLOYHOLD, states.CLEANHOLD, states.SERVICEHOLD,
|
||||
states.SERVICING, states.SERVICEWAIT}
|
||||
for state in set(states.machine.states) - allowed:
|
||||
for m in (next_step_mock, log_mock):
|
||||
m.reset_mock()
|
||||
@ -253,7 +258,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
def test_heartbeat_noops_in_wrong_state2(self, next_step_mock):
|
||||
CONF.set_override('allow_provisioning_in_maintenance', False,
|
||||
group='conductor')
|
||||
allowed = {states.DEPLOYWAIT, states.CLEANWAIT}
|
||||
allowed = {states.DEPLOYWAIT, states.CLEANWAIT,
|
||||
states.SERVICEWAIT}
|
||||
for state in set(states.machine.states) - allowed:
|
||||
next_step_mock.reset_mock()
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
@ -466,7 +472,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
@mock.patch.object(agent_base.LOG, 'error', autospec=True)
|
||||
def test_heartbeat_records_when_appropriate(self, log_mock):
|
||||
for provision_state in (states.CLEANING, states.DEPLOYING,
|
||||
states.CLEANHOLD, states.DEPLOYHOLD):
|
||||
states.CLEANHOLD, states.DEPLOYHOLD,
|
||||
states.SERVICEHOLD, states.SERVICING):
|
||||
self.node.driver_internal_info = {}
|
||||
self.node.provision_state = provision_state
|
||||
self.node.save()
|
||||
@ -521,6 +528,68 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
task.node.driver_internal_info['agent_last_heartbeat'])
|
||||
self.assertEqual(provision_state, task.node.provision_state)
|
||||
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base.HeartbeatMixin,
|
||||
'refresh_steps', autospec=True)
|
||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||
autospec=True)
|
||||
@mock.patch.object(servicing, 'continue_node_service', autospec=True)
|
||||
def test_heartbeat_resume_service(self, mock_service, mock_set_steps,
|
||||
mock_refresh, mock_touch):
|
||||
self.node.clean_step = {}
|
||||
self.node.provision_state = states.SERVICEWAIT
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0')
|
||||
|
||||
mock_touch.assert_called_once_with(mock.ANY)
|
||||
mock_refresh.assert_called_once_with(mock.ANY, task, 'service')
|
||||
mock_service.assert_called_once_with(task)
|
||||
mock_set_steps.assert_called_once_with(task)
|
||||
|
||||
@mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
|
||||
@mock.patch.object(agent_base.HeartbeatMixin,
|
||||
'continue_servicing', autospec=True)
|
||||
def test_heartbeat_continue_servicing(self, mock_continue, mock_touch):
|
||||
self.node.service_step = {
|
||||
'priority': 10,
|
||||
'interface': 'deploy',
|
||||
'step': 'foo',
|
||||
'reboot_requested': False
|
||||
}
|
||||
self.node.provision_state = states.SERVICEWAIT
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0')
|
||||
|
||||
mock_touch.assert_called_once_with(mock.ANY)
|
||||
mock_continue.assert_called_once_with(mock.ANY, task)
|
||||
|
||||
@mock.patch.object(manager_utils, 'servicing_error_handler', autospec=True)
|
||||
@mock.patch.object(agent_base.HeartbeatMixin,
|
||||
'continue_servicing', autospec=True)
|
||||
def test_heartbeat_continue_servicing_fails(self, mock_continue,
|
||||
mock_handler):
|
||||
self.node.service_step = {
|
||||
'priority': 10,
|
||||
'interface': 'deploy',
|
||||
'step': 'foo',
|
||||
'reboot_requested': False
|
||||
}
|
||||
|
||||
mock_continue.side_effect = Exception()
|
||||
|
||||
self.node.provision_state = states.SERVICEWAIT
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0')
|
||||
|
||||
mock_continue.assert_called_once_with(mock.ANY, task)
|
||||
mock_handler.assert_called_once_with(task, mock.ANY, mock.ANY)
|
||||
|
||||
|
||||
class AgentRescueTests(AgentDeployMixinBaseTest):
|
||||
|
||||
@ -1604,6 +1673,25 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest):
|
||||
self.assertNotIn('deployment_reboot',
|
||||
task.node.driver_internal_info)
|
||||
|
||||
@mock.patch.object(deploy_utils, 'build_agent_options', autospec=True)
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(manager_utils, 'servicing_error_handler', autospec=True)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
def test__post_step_reboot_fail_servicing(self, mock_reboot, mock_handler,
|
||||
mock_prepare, mock_build_opt):
|
||||
mock_reboot.side_effect = RuntimeError("broken")
|
||||
self.node.provision_state = states.SERVICEWAIT
|
||||
self.node.save()
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
agent_base._post_step_reboot(task, 'service')
|
||||
mock_reboot.assert_called_once_with(task, states.REBOOT)
|
||||
mock_handler.assert_called_once_with(task, mock.ANY,
|
||||
traceback=True)
|
||||
self.assertNotIn('servicing_reboot',
|
||||
task.node.driver_internal_info)
|
||||
|
||||
def _test_clean_step_hook(self):
|
||||
"""Helper method for unit tests related to clean step hooks."""
|
||||
some_function_mock = mock.MagicMock()
|
||||
@ -1998,6 +2086,111 @@ class ContinueCleaningTest(AgentDeployMixinBaseTest):
|
||||
error_mock.assert_called_once_with(task, mock.ANY, traceback=False)
|
||||
|
||||
|
||||
class ContinueServiceTest(AgentDeployMixinBaseTest):
|
||||
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
self.node.provision_state = states.SERVICEWAIT
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.save()
|
||||
|
||||
@mock.patch.object(servicing, 'continue_node_service', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_continue_servicing(self, status_mock, service_mock):
|
||||
# Test a successful execute clean step on the agent
|
||||
self.node.service_step = {
|
||||
'priority': 10,
|
||||
'interface': 'deploy',
|
||||
'step': 'erase_devices',
|
||||
'reboot_requested': False
|
||||
}
|
||||
self.node.save()
|
||||
status_mock.return_value = [{
|
||||
'command_status': 'SUCCEEDED',
|
||||
'command_name': 'execute_service_step',
|
||||
'command_result': {
|
||||
'service_step': self.node.service_step
|
||||
}
|
||||
}]
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
self.deploy.continue_servicing(task)
|
||||
service_mock.assert_called_once_with(task)
|
||||
self.assertEqual(states.SERVICING, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE,
|
||||
task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(deploy_utils, 'build_agent_options', autospec=True)
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_continue_servicing_reboot(
|
||||
self, status_mock, reboot_mock, mock_prepare, mock_build_opt):
|
||||
# Test a successful execute clean step on the agent, with reboot
|
||||
self.node.service_step = {
|
||||
'priority': 42,
|
||||
'interface': 'deploy',
|
||||
'step': 'reboot_me_afterwards',
|
||||
'reboot_requested': True
|
||||
}
|
||||
self.node.save()
|
||||
status_mock.return_value = [{
|
||||
'command_status': 'SUCCEEDED',
|
||||
'command_name': 'execute_service_step',
|
||||
'command_result': {
|
||||
'service_step': self.node.service_step
|
||||
}
|
||||
}]
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
self.deploy.continue_servicing(task)
|
||||
reboot_mock.assert_called_once_with(task, states.REBOOT)
|
||||
|
||||
@mock.patch.object(servicing, 'continue_node_service', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_continue_servicing_after_reboot(self, status_mock, service_mock):
|
||||
# Test a successful execute clean step on the agent, with reboot
|
||||
self.node.service_step = {
|
||||
'priority': 42,
|
||||
'interface': 'deploy',
|
||||
'step': 'reboot_me_afterwards',
|
||||
'reboot_requested': True
|
||||
}
|
||||
driver_internal_info = self.node.driver_internal_info
|
||||
driver_internal_info['servicing_reboot'] = True
|
||||
self.node.driver_internal_info = driver_internal_info
|
||||
self.node.save()
|
||||
# Represents a freshly booted agent with no commands
|
||||
status_mock.return_value = []
|
||||
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
self.deploy.continue_servicing(task)
|
||||
service_mock.assert_called_once_with(task)
|
||||
self.assertEqual(states.SERVICING, task.node.provision_state)
|
||||
self.assertNotIn('servicing_reboot',
|
||||
task.node.driver_internal_info)
|
||||
|
||||
@mock.patch.object(servicing, 'continue_node_service', autospec=True)
|
||||
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
|
||||
autospec=True)
|
||||
def test_continue_servicing_running(self, status_mock, service_mock):
|
||||
# Test that no action is taken while a clean step is executing
|
||||
status_mock.return_value = [{
|
||||
'command_status': 'RUNNING',
|
||||
'command_name': 'execute_service_step',
|
||||
'command_result': None
|
||||
}]
|
||||
with task_manager.acquire(self.context, self.node['uuid'],
|
||||
shared=False) as task:
|
||||
self.deploy.continue_servicing(task)
|
||||
self.assertFalse(service_mock.called)
|
||||
|
||||
|
||||
class TestRefreshCleanSteps(AgentDeployMixinBaseTest):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -676,7 +676,7 @@ class TestObject(_LocalTest, _TestObject):
|
||||
# version bump. It is an MD5 hash of the object fields and remotable methods.
|
||||
# The fingerprint values should only be changed if there is a version bump.
|
||||
expected_object_fingerprints = {
|
||||
'Node': '1.39-ee3f5ff28b79f9fabf84a50e34a71684',
|
||||
'Node': '1.40-2182d4660bb5d5e4cc5670c37012ef71',
|
||||
'MyObj': '1.5-9459d30d6954bffc7a9afd347a807ca6',
|
||||
'Chassis': '1.3-d656e039fd8ae9f34efc232ab3980905',
|
||||
'Port': '1.11-97bf15b61224f26c65e90f007d78bfd2',
|
||||
|
19
releasenotes/notes/add-service-steps-deb45c9a0e77a647.yaml
Normal file
19
releasenotes/notes/add-service-steps-deb45c9a0e77a647.yaml
Normal file
@ -0,0 +1,19 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds a new Ironic capability called ``service_steps`` which allows a
|
||||
deployed ``ACTIVE`` node to be modified utilizing a new API provision
|
||||
state verb of ``service`` which can include a list of ``service_steps``
|
||||
to be performed. This work is inspired by ``clean_steps`` and
|
||||
``deploy_steps`` and similar to those efforts, this functionality will
|
||||
continue to evolve as new features, functionality, and capabilities
|
||||
are added.
|
||||
- Adds a new driver method decorator ``base.service_step`` which operates
|
||||
exactly like the existing ``base.clean_step`` and ``base.deploy_step``
|
||||
decorators. Driver methods which are decorated *can* be invoked utilizing
|
||||
the service steps.
|
||||
issues:
|
||||
- |
|
||||
The ``service_steps`` functionality does not understand how to poll and
|
||||
communicate with the ``ironic-python-agent``. This is anticipated to be
|
||||
addressed in a future release.
|
Loading…
Reference in New Issue
Block a user