Follow-up patch for generic node inspection

Implements the nit comments which were not addressed
as part of change id
If9aab90accf5a60ad3febbfc1f5d367eed987c13

Implements: blueprint ironic-node-properties-discovery

Change-Id: Id6d60f460100f321624bb7d59d4e7856224fd22b
This commit is contained in:
Nisha Agarwal 2015-03-02 22:35:33 -08:00
parent 6d925f1f80
commit 37a655674e
6 changed files with 64 additions and 51 deletions

View File

@ -202,6 +202,10 @@
# (string value)
#auth_strategy=keystone
# Enable pecan debug mode. WARNING: this is insecure and
# should not be used in production. (boolean value)
#pecan_debug=false
#
# Options defined in ironic.common.driver_factory
@ -605,7 +609,7 @@
#configdrive_swift_container=ironic_configdrive_container
# Timeout (seconds) for waiting for node inspection. 0 -
# unlimited. (integer value) (integer value)
# unlimited. (integer value)
#inspect_timeout=1800

View File

@ -72,10 +72,11 @@ def hide_driver_internal_info(obj):
def check_allow_management_verbs(verb):
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
# and out of that state. Reject requests to do this in older versions
if ((pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']])
or (pecan.request.version.minor < 6 and
verb == ir_states.VERBS['inspect'])):
if (pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
raise exception.NotAcceptable()
if (pecan.request.version.minor < 6 and
verb == ir_states.VERBS['inspect']):
raise exception.NotAcceptable()
@ -518,10 +519,11 @@ class Node(base.APIBase):
"""The UTC date and time of the last provision state change"""
inspection_finished_at = datetime.datetime
"""The UTC date and time when the last inspection finished successfully."""
"""The UTC date and time when the last hardware inspection finished
successfully."""
inspection_started_at = datetime.datetime
"""The UTC date and time of the hardware when inspection was started"""
"""The UTC date and time when the hardware inspection was started"""
maintenance = types.boolean
"""Indicates whether the node is in maintenance mode."""

View File

@ -166,7 +166,7 @@ conductor_opts = [
cfg.IntOpt('inspect_timeout',
default=1800,
help='Timeout (seconds) for waiting for node inspection. '
'0 - unlimited. (integer value)'),
'0 - unlimited.'),
]
CONF = cfg.CONF
@ -928,7 +928,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
sort_key = 'provision_updated_at'
callback_method = utils.cleanup_after_timeout
err_handler = provisioning_error_handler
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT,
self._fail_if_in_state(context, filters, states.DEPLOYWAIT,
sort_key, callback_method, err_handler)
def _do_takeover(self, task):
@ -1403,6 +1403,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
exception.NodeLocked,
exception.HardwareInspectionFailure,
exception.InvalidStateRequested,
exception.UnsupportedDriverExtension)
def inspect_hardware(self, context, node_id):
"""Inspect hardware to obtain hardware properties.
@ -1420,11 +1421,12 @@ class ConductorManager(periodic_task.PeriodicTasks):
async task
:raises: HardwareInspectionFailure when unable to get
essential scheduling properties from hardware.
:raises: InvalidStateRequested if 'inspect' is not a
valid action to do in the current state.
"""
LOG.debug('RPC inspect_hardware called for node %s', node_id)
with task_manager.acquire(context, node_id, shared=False) as task:
node = task.node
if not getattr(task.driver, 'inspect', None):
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='inspect')
@ -1442,16 +1444,13 @@ class ConductorManager(periodic_task.PeriodicTasks):
try:
task.process_event('inspect',
callback=self._spawn_worker,
call_args=(_do_inspect_hardware, task,
self.conductor.id),
call_args=(_do_inspect_hardware, task),
err_handler=provisioning_error_handler)
except exception.InvalidState:
error = (_("Inspection is not possible while node "
"%(node)s is in state %(state)s")
% {'node': node.uuid,
'state': node.provision_state})
raise exception.HardwareInspectionFailure(error=error)
raise exception.InvalidStateRequested(
action='inspect', node=task.node.uuid,
state=task.node.provision_state)
@periodic_task.periodic_task(
spacing=CONF.conductor.check_provision_state_interval)
@ -1469,18 +1468,26 @@ class ConductorManager(periodic_task.PeriodicTasks):
'provision_state': states.INSPECTING,
'inspection_started_before': callback_timeout}
sort_key = 'inspection_started_at'
last_error = _("timeout reached while waiting for callback")
self._fail_if_timeout_reached(context, filters, states.INSPECTING,
last_error = _("timeout reached while inspecting the node")
self._fail_if_in_state(context, filters, states.INSPECTING,
sort_key, last_error=last_error)
def _fail_if_timeout_reached(self, context, filters, provision_state,
def _fail_if_in_state(self, context, filters, provision_state,
sort_key, callback_method=None,
err_handler=None, last_error=None):
"""Checks if the async(background) process has reached timeout.
"""Fail nodes that are in specified state.
Retrieves nodes that satisfy the criteria in 'filters'.
If any of these nodes is in 'provision_state', it has failed
in whatever provisioning activity it was currently doing.
That failure is processed here.
:param: context: request context
:param: filters: criteria (as a dictionary) to get the desired
list of nodes.
list of nodes that satisfy the filter constraints.
For example, if filters['provisioned_before'] = 60,
this would process nodes whose provision_updated_at
field value was 60 or more seconds before 'now'.
:param: provision_state: provision_state that the node is in,
for the provisioning activity to have failed.
:param: sort_key: the nodes are sorted based on this key.
@ -1488,8 +1495,9 @@ class ConductorManager(periodic_task.PeriodicTasks):
spawned thread, for a failed node. This
method must take a :class:`TaskManager` as
the first (and only required) parameter.
:param: err_handler: the error handler to be invoked if an error.
occurs trying to spawn a thread for a failed node.
:param: err_handler: for a failed node, the error handler to invoke
if an error occurs trying to spawn an thread
to do the callback_method.
:param: last_error: the error message to be updated in node.last_error
"""
@ -1516,9 +1524,6 @@ class ConductorManager(periodic_task.PeriodicTasks):
call_args=(callback_method, task),
err_handler=err_handler)
else:
if not last_error:
last_error = _("timeout reached while waiting "
"for callback")
task.node.last_error = last_error
task.process_event('fail')
except exception.NoFreeConductorWorker:
@ -1802,12 +1807,19 @@ def do_sync_power_state(task, count):
return count
def _do_inspect_hardware(task, conductor_id):
"""Prepare the environment and inspect a node."""
def _do_inspect_hardware(task):
"""Initiates inspection.
:param: task: a TaskManager instance with an exclusive lock
on its node.
:raises: HardwareInspectionFailure if driver doesn't
return the state as states.MANAGEABLE or
states.INSPECTING.
"""
node = task.node
def handle_failure(e):
# NOTE(deva): there is no need to clear conductor_affinity
node.last_error = e
task.process_event('fail')
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
@ -1821,16 +1833,12 @@ def _do_inspect_hardware(task, conductor_id):
error = str(e)
handle_failure(error)
# Update conductor_affinity to reference this conductor's ID
# since there may be local persistent state
node.conductor_affinity = conductor_id
if new_state == states.MANAGEABLE:
task.process_event('done')
LOG.info(_LI('Successfully inspected node %(node)s')
% {'node': node.uuid})
elif new_state != states.INSPECTING:
error = (_("Driver returned unexpected state in inspection"
"%(state)s") % {'state': new_state})
error = (_("During inspection, driver returned unexpected "
"state %(state)s") % {'state': new_state})
handle_failure(error)
raise exception.HardwareInspectionFailure(error=error)

View File

@ -497,6 +497,8 @@ class ConductorAPI(object):
async task.
:raises: UnsupportedDriverExtension if the node's driver doesn't
support inspection.
:raises: InvalidStateRequested if 'inspect' is not a valid
action to do in the current state.
"""
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')

View File

@ -379,13 +379,12 @@ class Connection(api.Connection):
if values['provision_state'] == states.INSPECTING:
values['inspection_started_at'] = timeutils.utcnow()
values['inspection_finished_at'] = None
if (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.MANAGEABLE):
elif (ref.provision_state == states.INSPECTING and
values['provision_state'] == states.MANAGEABLE):
values['inspection_finished_at'] = timeutils.utcnow()
values['inspection_started_at'] = None
elif (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.INSPECTFAIL):
values['provision_state'] == states.INSPECTFAIL):
values['inspection_started_at'] = None
ref.update(values)

View File

@ -3083,7 +3083,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.MANAGEABLE
manager._do_inspect_hardware(task, self.service.conductor.id)
manager._do_inspect_hardware(task)
node.refresh()
self.assertEqual(states.MANAGEABLE, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
@ -3097,7 +3097,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.INSPECTING
manager._do_inspect_hardware(task, self.service.conductor.id)
manager._do_inspect_hardware(task)
node.refresh()
self.assertEqual(states.INSPECTING, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state)
@ -3113,8 +3113,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = None
self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task,
self.service.conductor.id)
manager._do_inspect_hardware, task)
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
@ -3196,8 +3195,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
task = task_manager.TaskManager(self.context, node.uuid)
self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task,
self.service.conductor.id)
manager._do_inspect_hardware, task)
node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state)