Follow-up patch for generic node inspection

Implements the nit comments which were not addressed
as part of change id
If9aab90accf5a60ad3febbfc1f5d367eed987c13

Implements: blueprint ironic-node-properties-discovery

Change-Id: Id6d60f460100f321624bb7d59d4e7856224fd22b
This commit is contained in:
Nisha Agarwal 2015-03-02 22:35:33 -08:00
parent 6d925f1f80
commit 37a655674e
6 changed files with 64 additions and 51 deletions

View File

@ -202,6 +202,10 @@
# (string value)
#auth_strategy=keystone
# Enable pecan debug mode. WARNING: this is insecure and
# should not be used in production. (boolean value)
#pecan_debug=false
#
# Options defined in ironic.common.driver_factory
@ -605,7 +609,7 @@
#configdrive_swift_container=ironic_configdrive_container
# Timeout (seconds) for waiting for node inspection. 0 -
# unlimited. (integer value) (integer value)
# unlimited. (integer value)
#inspect_timeout=1800

View File

@ -72,10 +72,11 @@ def hide_driver_internal_info(obj):
def check_allow_management_verbs(verb):
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
# and out of that state. Reject requests to do this in older versions
if ((pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']])
or (pecan.request.version.minor < 6 and
verb == ir_states.VERBS['inspect'])):
if (pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
raise exception.NotAcceptable()
if (pecan.request.version.minor < 6 and
verb == ir_states.VERBS['inspect']):
raise exception.NotAcceptable()
@ -518,10 +519,11 @@ class Node(base.APIBase):
"""The UTC date and time of the last provision state change"""
inspection_finished_at = datetime.datetime
"""The UTC date and time when the last inspection finished successfully."""
"""The UTC date and time when the last hardware inspection finished
successfully."""
inspection_started_at = datetime.datetime
"""The UTC date and time of the hardware when inspection was started"""
"""The UTC date and time when the hardware inspection was started"""
maintenance = types.boolean
"""Indicates whether the node is in maintenance mode."""

View File

@ -166,7 +166,7 @@ conductor_opts = [
cfg.IntOpt('inspect_timeout',
default=1800,
help='Timeout (seconds) for waiting for node inspection. '
'0 - unlimited. (integer value)'),
'0 - unlimited.'),
]
CONF = cfg.CONF
@ -928,8 +928,8 @@ class ConductorManager(periodic_task.PeriodicTasks):
sort_key = 'provision_updated_at'
callback_method = utils.cleanup_after_timeout
err_handler = provisioning_error_handler
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT,
sort_key, callback_method, err_handler)
self._fail_if_in_state(context, filters, states.DEPLOYWAIT,
sort_key, callback_method, err_handler)
def _do_takeover(self, task):
LOG.debug(('Conductor %(cdr)s taking over node %(node)s'),
@ -1403,6 +1403,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
exception.NodeLocked,
exception.HardwareInspectionFailure,
exception.InvalidStateRequested,
exception.UnsupportedDriverExtension)
def inspect_hardware(self, context, node_id):
"""Inspect hardware to obtain hardware properties.
@ -1420,11 +1421,12 @@ class ConductorManager(periodic_task.PeriodicTasks):
async task
:raises: HardwareInspectionFailure when unable to get
essential scheduling properties from hardware.
:raises: InvalidStateRequested if 'inspect' is not a
valid action to do in the current state.
"""
LOG.debug('RPC inspect_hardware called for node %s', node_id)
with task_manager.acquire(context, node_id, shared=False) as task:
node = task.node
if not getattr(task.driver, 'inspect', None):
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='inspect')
@ -1442,16 +1444,13 @@ class ConductorManager(periodic_task.PeriodicTasks):
try:
task.process_event('inspect',
callback=self._spawn_worker,
call_args=(_do_inspect_hardware, task,
self.conductor.id),
call_args=(_do_inspect_hardware, task),
err_handler=provisioning_error_handler)
except exception.InvalidState:
error = (_("Inspection is not possible while node "
"%(node)s is in state %(state)s")
% {'node': node.uuid,
'state': node.provision_state})
raise exception.HardwareInspectionFailure(error=error)
raise exception.InvalidStateRequested(
action='inspect', node=task.node.uuid,
state=task.node.provision_state)
@periodic_task.periodic_task(
spacing=CONF.conductor.check_provision_state_interval)
@ -1469,18 +1468,26 @@ class ConductorManager(periodic_task.PeriodicTasks):
'provision_state': states.INSPECTING,
'inspection_started_before': callback_timeout}
sort_key = 'inspection_started_at'
last_error = _("timeout reached while waiting for callback")
self._fail_if_timeout_reached(context, filters, states.INSPECTING,
sort_key, last_error=last_error)
last_error = _("timeout reached while inspecting the node")
self._fail_if_in_state(context, filters, states.INSPECTING,
sort_key, last_error=last_error)
def _fail_if_timeout_reached(self, context, filters, provision_state,
sort_key, callback_method=None,
err_handler=None, last_error=None):
"""Checks if the async(background) process has reached timeout.
def _fail_if_in_state(self, context, filters, provision_state,
sort_key, callback_method=None,
err_handler=None, last_error=None):
"""Fail nodes that are in specified state.
Retrieves nodes that satisfy the criteria in 'filters'.
If any of these nodes is in 'provision_state', it has failed
in whatever provisioning activity it was currently doing.
That failure is processed here.
:param: context: request context
:param: filters: criteria (as a dictionary) to get the desired
list of nodes.
list of nodes that satisfy the filter constraints.
For example, if filters['provisioned_before'] = 60,
this would process nodes whose provision_updated_at
field value was 60 or more seconds before 'now'.
:param: provision_state: provision_state that the node is in,
for the provisioning activity to have failed.
:param: sort_key: the nodes are sorted based on this key.
@ -1488,8 +1495,9 @@ class ConductorManager(periodic_task.PeriodicTasks):
spawned thread, for a failed node. This
method must take a :class:`TaskManager` as
the first (and only required) parameter.
:param: err_handler: the error handler to be invoked if an error.
occurs trying to spawn a thread for a failed node.
:param: err_handler: for a failed node, the error handler to invoke
if an error occurs trying to spawn an thread
to do the callback_method.
:param: last_error: the error message to be updated in node.last_error
"""
@ -1516,9 +1524,6 @@ class ConductorManager(periodic_task.PeriodicTasks):
call_args=(callback_method, task),
err_handler=err_handler)
else:
if not last_error:
last_error = _("timeout reached while waiting "
"for callback")
task.node.last_error = last_error
task.process_event('fail')
except exception.NoFreeConductorWorker:
@ -1802,12 +1807,19 @@ def do_sync_power_state(task, count):
return count
def _do_inspect_hardware(task, conductor_id):
"""Prepare the environment and inspect a node."""
def _do_inspect_hardware(task):
"""Initiates inspection.
:param: task: a TaskManager instance with an exclusive lock
on its node.
:raises: HardwareInspectionFailure if driver doesn't
return the state as states.MANAGEABLE or
states.INSPECTING.
"""
node = task.node
def handle_failure(e):
# NOTE(deva): there is no need to clear conductor_affinity
node.last_error = e
task.process_event('fail')
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
@ -1821,16 +1833,12 @@ def _do_inspect_hardware(task, conductor_id):
error = str(e)
handle_failure(error)
# Update conductor_affinity to reference this conductor's ID
# since there may be local persistent state
node.conductor_affinity = conductor_id
if new_state == states.MANAGEABLE:
task.process_event('done')
LOG.info(_LI('Successfully inspected node %(node)s')
% {'node': node.uuid})
elif new_state != states.INSPECTING:
error = (_("Driver returned unexpected state in inspection"
"%(state)s") % {'state': new_state})
error = (_("During inspection, driver returned unexpected "
"state %(state)s") % {'state': new_state})
handle_failure(error)
raise exception.HardwareInspectionFailure(error=error)

View File

@ -497,6 +497,8 @@ class ConductorAPI(object):
async task.
:raises: UnsupportedDriverExtension if the node's driver doesn't
support inspection.
:raises: InvalidStateRequested if 'inspect' is not a valid
action to do in the current state.
"""
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')

View File

@ -379,13 +379,12 @@ class Connection(api.Connection):
if values['provision_state'] == states.INSPECTING:
values['inspection_started_at'] = timeutils.utcnow()
values['inspection_finished_at'] = None
if (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.MANAGEABLE):
elif (ref.provision_state == states.INSPECTING and
values['provision_state'] == states.MANAGEABLE):
values['inspection_finished_at'] = timeutils.utcnow()
values['inspection_started_at'] = None
elif (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.INSPECTFAIL):
elif (ref.provision_state == states.INSPECTING and
values['provision_state'] == states.INSPECTFAIL):
values['inspection_started_at'] = None
ref.update(values)

View File

@ -3083,7 +3083,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.MANAGEABLE
manager._do_inspect_hardware(task, self.service.conductor.id)
manager._do_inspect_hardware(task)
node.refresh()
self.assertEqual(states.MANAGEABLE, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
@ -3097,7 +3097,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.INSPECTING
manager._do_inspect_hardware(task, self.service.conductor.id)
manager._do_inspect_hardware(task)
node.refresh()
self.assertEqual(states.INSPECTING, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
@ -3113,8 +3113,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = None
self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task,
self.service.conductor.id)
manager._do_inspect_hardware, task)
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
@ -3196,8 +3195,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
task = task_manager.TaskManager(self.context, node.uuid)
self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task,
self.service.conductor.id)
manager._do_inspect_hardware, task)
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)