Follow-up patch for generic node inspection
Implements the nit comments which were not addressed as part of change id If9aab90accf5a60ad3febbfc1f5d367eed987c13 Implements: blueprint ironic-node-properties-discovery Change-Id: Id6d60f460100f321624bb7d59d4e7856224fd22b
This commit is contained in:
parent
6d925f1f80
commit
37a655674e
@ -202,6 +202,10 @@
|
||||
# (string value)
|
||||
#auth_strategy=keystone
|
||||
|
||||
# Enable pecan debug mode. WARNING: this is insecure and
|
||||
# should not be used in production. (boolean value)
|
||||
#pecan_debug=false
|
||||
|
||||
|
||||
#
|
||||
# Options defined in ironic.common.driver_factory
|
||||
@ -605,7 +609,7 @@
|
||||
#configdrive_swift_container=ironic_configdrive_container
|
||||
|
||||
# Timeout (seconds) for waiting for node inspection. 0 -
|
||||
# unlimited. (integer value) (integer value)
|
||||
# unlimited. (integer value)
|
||||
#inspect_timeout=1800
|
||||
|
||||
|
||||
|
@ -72,10 +72,11 @@ def hide_driver_internal_info(obj):
|
||||
def check_allow_management_verbs(verb):
|
||||
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
|
||||
# and out of that state. Reject requests to do this in older versions
|
||||
if ((pecan.request.version.minor < 4 and
|
||||
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']])
|
||||
or (pecan.request.version.minor < 6 and
|
||||
verb == ir_states.VERBS['inspect'])):
|
||||
if (pecan.request.version.minor < 4 and
|
||||
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
|
||||
raise exception.NotAcceptable()
|
||||
if (pecan.request.version.minor < 6 and
|
||||
verb == ir_states.VERBS['inspect']):
|
||||
raise exception.NotAcceptable()
|
||||
|
||||
|
||||
@ -518,10 +519,11 @@ class Node(base.APIBase):
|
||||
"""The UTC date and time of the last provision state change"""
|
||||
|
||||
inspection_finished_at = datetime.datetime
|
||||
"""The UTC date and time when the last inspection finished successfully."""
|
||||
"""The UTC date and time when the last hardware inspection finished
|
||||
successfully."""
|
||||
|
||||
inspection_started_at = datetime.datetime
|
||||
"""The UTC date and time of the hardware when inspection was started"""
|
||||
"""The UTC date and time when the hardware inspection was started"""
|
||||
|
||||
maintenance = types.boolean
|
||||
"""Indicates whether the node is in maintenance mode."""
|
||||
|
@ -166,7 +166,7 @@ conductor_opts = [
|
||||
cfg.IntOpt('inspect_timeout',
|
||||
default=1800,
|
||||
help='Timeout (seconds) for waiting for node inspection. '
|
||||
'0 - unlimited. (integer value)'),
|
||||
'0 - unlimited.'),
|
||||
|
||||
]
|
||||
CONF = cfg.CONF
|
||||
@ -928,7 +928,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
sort_key = 'provision_updated_at'
|
||||
callback_method = utils.cleanup_after_timeout
|
||||
err_handler = provisioning_error_handler
|
||||
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT,
|
||||
self._fail_if_in_state(context, filters, states.DEPLOYWAIT,
|
||||
sort_key, callback_method, err_handler)
|
||||
|
||||
def _do_takeover(self, task):
|
||||
@ -1403,6 +1403,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
|
||||
exception.NodeLocked,
|
||||
exception.HardwareInspectionFailure,
|
||||
exception.InvalidStateRequested,
|
||||
exception.UnsupportedDriverExtension)
|
||||
def inspect_hardware(self, context, node_id):
|
||||
"""Inspect hardware to obtain hardware properties.
|
||||
@ -1420,11 +1421,12 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
async task
|
||||
:raises: HardwareInspectionFailure when unable to get
|
||||
essential scheduling properties from hardware.
|
||||
:raises: InvalidStateRequested if 'inspect' is not a
|
||||
valid action to do in the current state.
|
||||
|
||||
"""
|
||||
LOG.debug('RPC inspect_hardware called for node %s', node_id)
|
||||
with task_manager.acquire(context, node_id, shared=False) as task:
|
||||
node = task.node
|
||||
if not getattr(task.driver, 'inspect', None):
|
||||
raise exception.UnsupportedDriverExtension(
|
||||
driver=task.node.driver, extension='inspect')
|
||||
@ -1442,16 +1444,13 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
try:
|
||||
task.process_event('inspect',
|
||||
callback=self._spawn_worker,
|
||||
call_args=(_do_inspect_hardware, task,
|
||||
self.conductor.id),
|
||||
call_args=(_do_inspect_hardware, task),
|
||||
err_handler=provisioning_error_handler)
|
||||
|
||||
except exception.InvalidState:
|
||||
error = (_("Inspection is not possible while node "
|
||||
"%(node)s is in state %(state)s")
|
||||
% {'node': node.uuid,
|
||||
'state': node.provision_state})
|
||||
raise exception.HardwareInspectionFailure(error=error)
|
||||
raise exception.InvalidStateRequested(
|
||||
action='inspect', node=task.node.uuid,
|
||||
state=task.node.provision_state)
|
||||
|
||||
@periodic_task.periodic_task(
|
||||
spacing=CONF.conductor.check_provision_state_interval)
|
||||
@ -1469,18 +1468,26 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
'provision_state': states.INSPECTING,
|
||||
'inspection_started_before': callback_timeout}
|
||||
sort_key = 'inspection_started_at'
|
||||
last_error = _("timeout reached while waiting for callback")
|
||||
self._fail_if_timeout_reached(context, filters, states.INSPECTING,
|
||||
last_error = _("timeout reached while inspecting the node")
|
||||
self._fail_if_in_state(context, filters, states.INSPECTING,
|
||||
sort_key, last_error=last_error)
|
||||
|
||||
def _fail_if_timeout_reached(self, context, filters, provision_state,
|
||||
def _fail_if_in_state(self, context, filters, provision_state,
|
||||
sort_key, callback_method=None,
|
||||
err_handler=None, last_error=None):
|
||||
"""Checks if the async(background) process has reached timeout.
|
||||
"""Fail nodes that are in specified state.
|
||||
|
||||
Retrieves nodes that satisfy the criteria in 'filters'.
|
||||
If any of these nodes is in 'provision_state', it has failed
|
||||
in whatever provisioning activity it was currently doing.
|
||||
That failure is processed here.
|
||||
|
||||
:param: context: request context
|
||||
:param: filters: criteria (as a dictionary) to get the desired
|
||||
list of nodes.
|
||||
list of nodes that satisfy the filter constraints.
|
||||
For example, if filters['provisioned_before'] = 60,
|
||||
this would process nodes whose provision_updated_at
|
||||
field value was 60 or more seconds before 'now'.
|
||||
:param: provision_state: provision_state that the node is in,
|
||||
for the provisioning activity to have failed.
|
||||
:param: sort_key: the nodes are sorted based on this key.
|
||||
@ -1488,8 +1495,9 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
spawned thread, for a failed node. This
|
||||
method must take a :class:`TaskManager` as
|
||||
the first (and only required) parameter.
|
||||
:param: err_handler: the error handler to be invoked if an error.
|
||||
occurs trying to spawn a thread for a failed node.
|
||||
:param: err_handler: for a failed node, the error handler to invoke
|
||||
if an error occurs trying to spawn an thread
|
||||
to do the callback_method.
|
||||
:param: last_error: the error message to be updated in node.last_error
|
||||
|
||||
"""
|
||||
@ -1516,9 +1524,6 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
||||
call_args=(callback_method, task),
|
||||
err_handler=err_handler)
|
||||
else:
|
||||
if not last_error:
|
||||
last_error = _("timeout reached while waiting "
|
||||
"for callback")
|
||||
task.node.last_error = last_error
|
||||
task.process_event('fail')
|
||||
except exception.NoFreeConductorWorker:
|
||||
@ -1802,12 +1807,19 @@ def do_sync_power_state(task, count):
|
||||
return count
|
||||
|
||||
|
||||
def _do_inspect_hardware(task, conductor_id):
|
||||
"""Prepare the environment and inspect a node."""
|
||||
def _do_inspect_hardware(task):
|
||||
"""Initiates inspection.
|
||||
|
||||
:param: task: a TaskManager instance with an exclusive lock
|
||||
on its node.
|
||||
:raises: HardwareInspectionFailure if driver doesn't
|
||||
return the state as states.MANAGEABLE or
|
||||
states.INSPECTING.
|
||||
|
||||
"""
|
||||
node = task.node
|
||||
|
||||
def handle_failure(e):
|
||||
# NOTE(deva): there is no need to clear conductor_affinity
|
||||
node.last_error = e
|
||||
task.process_event('fail')
|
||||
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
|
||||
@ -1821,16 +1833,12 @@ def _do_inspect_hardware(task, conductor_id):
|
||||
error = str(e)
|
||||
handle_failure(error)
|
||||
|
||||
# Update conductor_affinity to reference this conductor's ID
|
||||
# since there may be local persistent state
|
||||
node.conductor_affinity = conductor_id
|
||||
|
||||
if new_state == states.MANAGEABLE:
|
||||
task.process_event('done')
|
||||
LOG.info(_LI('Successfully inspected node %(node)s')
|
||||
% {'node': node.uuid})
|
||||
elif new_state != states.INSPECTING:
|
||||
error = (_("Driver returned unexpected state in inspection"
|
||||
"%(state)s") % {'state': new_state})
|
||||
error = (_("During inspection, driver returned unexpected "
|
||||
"state %(state)s") % {'state': new_state})
|
||||
handle_failure(error)
|
||||
raise exception.HardwareInspectionFailure(error=error)
|
||||
|
@ -497,6 +497,8 @@ class ConductorAPI(object):
|
||||
async task.
|
||||
:raises: UnsupportedDriverExtension if the node's driver doesn't
|
||||
support inspection.
|
||||
:raises: InvalidStateRequested if 'inspect' is not a valid
|
||||
action to do in the current state.
|
||||
|
||||
"""
|
||||
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')
|
||||
|
@ -379,13 +379,12 @@ class Connection(api.Connection):
|
||||
if values['provision_state'] == states.INSPECTING:
|
||||
values['inspection_started_at'] = timeutils.utcnow()
|
||||
values['inspection_finished_at'] = None
|
||||
|
||||
if (ref.provision_state == states.INSPECTING and
|
||||
values.get('provision_state') == states.MANAGEABLE):
|
||||
elif (ref.provision_state == states.INSPECTING and
|
||||
values['provision_state'] == states.MANAGEABLE):
|
||||
values['inspection_finished_at'] = timeutils.utcnow()
|
||||
values['inspection_started_at'] = None
|
||||
elif (ref.provision_state == states.INSPECTING and
|
||||
values.get('provision_state') == states.INSPECTFAIL):
|
||||
values['provision_state'] == states.INSPECTFAIL):
|
||||
values['inspection_started_at'] = None
|
||||
|
||||
ref.update(values)
|
||||
|
@ -3083,7 +3083,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
||||
provision_state=states.INSPECTING)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = states.MANAGEABLE
|
||||
manager._do_inspect_hardware(task, self.service.conductor.id)
|
||||
manager._do_inspect_hardware(task)
|
||||
node.refresh()
|
||||
self.assertEqual(states.MANAGEABLE, node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||
@ -3097,7 +3097,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
||||
provision_state=states.INSPECTING)
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = states.INSPECTING
|
||||
manager._do_inspect_hardware(task, self.service.conductor.id)
|
||||
manager._do_inspect_hardware(task)
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTING, node.provision_state)
|
||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||
@ -3113,8 +3113,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
mock_inspect.return_value = None
|
||||
self.assertRaises(exception.HardwareInspectionFailure,
|
||||
manager._do_inspect_hardware, task,
|
||||
self.service.conductor.id)
|
||||
manager._do_inspect_hardware, task)
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||
@ -3196,8 +3195,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
||||
task = task_manager.TaskManager(self.context, node.uuid)
|
||||
|
||||
self.assertRaises(exception.HardwareInspectionFailure,
|
||||
manager._do_inspect_hardware, task,
|
||||
self.service.conductor.id)
|
||||
manager._do_inspect_hardware, task)
|
||||
node.refresh()
|
||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||
|
Loading…
Reference in New Issue
Block a user