Follow-up patch for generic node inspection

Implements the nit comments which were not addressed
as part of change id
If9aab90accf5a60ad3febbfc1f5d367eed987c13

Implements: blueprint ironic-node-properties-discovery

Change-Id: Id6d60f460100f321624bb7d59d4e7856224fd22b
This commit is contained in:
Nisha Agarwal 2015-03-02 22:35:33 -08:00
parent 6d925f1f80
commit 37a655674e
6 changed files with 64 additions and 51 deletions

View File

@ -202,6 +202,10 @@
# (string value) # (string value)
#auth_strategy=keystone #auth_strategy=keystone
# Enable pecan debug mode. WARNING: this is insecure and
# should not be used in production. (boolean value)
#pecan_debug=false
# #
# Options defined in ironic.common.driver_factory # Options defined in ironic.common.driver_factory
@ -605,7 +609,7 @@
#configdrive_swift_container=ironic_configdrive_container #configdrive_swift_container=ironic_configdrive_container
# Timeout (seconds) for waiting for node inspection. 0 - # Timeout (seconds) for waiting for node inspection. 0 -
# unlimited. (integer value) (integer value) # unlimited. (integer value)
#inspect_timeout=1800 #inspect_timeout=1800

View File

@ -72,10 +72,11 @@ def hide_driver_internal_info(obj):
def check_allow_management_verbs(verb): def check_allow_management_verbs(verb):
# v1.4 added the MANAGEABLE state and two verbs to move nodes into # v1.4 added the MANAGEABLE state and two verbs to move nodes into
# and out of that state. Reject requests to do this in older versions # and out of that state. Reject requests to do this in older versions
if ((pecan.request.version.minor < 4 and if (pecan.request.version.minor < 4 and
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]) verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
or (pecan.request.version.minor < 6 and raise exception.NotAcceptable()
verb == ir_states.VERBS['inspect'])): if (pecan.request.version.minor < 6 and
verb == ir_states.VERBS['inspect']):
raise exception.NotAcceptable() raise exception.NotAcceptable()
@ -518,10 +519,11 @@ class Node(base.APIBase):
"""The UTC date and time of the last provision state change""" """The UTC date and time of the last provision state change"""
inspection_finished_at = datetime.datetime inspection_finished_at = datetime.datetime
"""The UTC date and time when the last inspection finished successfully.""" """The UTC date and time when the last hardware inspection finished
successfully."""
inspection_started_at = datetime.datetime inspection_started_at = datetime.datetime
"""The UTC date and time of the hardware when inspection was started""" """The UTC date and time when the hardware inspection was started"""
maintenance = types.boolean maintenance = types.boolean
"""Indicates whether the node is in maintenance mode.""" """Indicates whether the node is in maintenance mode."""

View File

@ -166,7 +166,7 @@ conductor_opts = [
cfg.IntOpt('inspect_timeout', cfg.IntOpt('inspect_timeout',
default=1800, default=1800,
help='Timeout (seconds) for waiting for node inspection. ' help='Timeout (seconds) for waiting for node inspection. '
'0 - unlimited. (integer value)'), '0 - unlimited.'),
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -928,8 +928,8 @@ class ConductorManager(periodic_task.PeriodicTasks):
sort_key = 'provision_updated_at' sort_key = 'provision_updated_at'
callback_method = utils.cleanup_after_timeout callback_method = utils.cleanup_after_timeout
err_handler = provisioning_error_handler err_handler = provisioning_error_handler
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT, self._fail_if_in_state(context, filters, states.DEPLOYWAIT,
sort_key, callback_method, err_handler) sort_key, callback_method, err_handler)
def _do_takeover(self, task): def _do_takeover(self, task):
LOG.debug(('Conductor %(cdr)s taking over node %(node)s'), LOG.debug(('Conductor %(cdr)s taking over node %(node)s'),
@ -1403,6 +1403,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
@messaging.expected_exceptions(exception.NoFreeConductorWorker, @messaging.expected_exceptions(exception.NoFreeConductorWorker,
exception.NodeLocked, exception.NodeLocked,
exception.HardwareInspectionFailure, exception.HardwareInspectionFailure,
exception.InvalidStateRequested,
exception.UnsupportedDriverExtension) exception.UnsupportedDriverExtension)
def inspect_hardware(self, context, node_id): def inspect_hardware(self, context, node_id):
"""Inspect hardware to obtain hardware properties. """Inspect hardware to obtain hardware properties.
@ -1420,11 +1421,12 @@ class ConductorManager(periodic_task.PeriodicTasks):
async task async task
:raises: HardwareInspectionFailure when unable to get :raises: HardwareInspectionFailure when unable to get
essential scheduling properties from hardware. essential scheduling properties from hardware.
:raises: InvalidStateRequested if 'inspect' is not a
valid action to do in the current state.
""" """
LOG.debug('RPC inspect_hardware called for node %s', node_id) LOG.debug('RPC inspect_hardware called for node %s', node_id)
with task_manager.acquire(context, node_id, shared=False) as task: with task_manager.acquire(context, node_id, shared=False) as task:
node = task.node
if not getattr(task.driver, 'inspect', None): if not getattr(task.driver, 'inspect', None):
raise exception.UnsupportedDriverExtension( raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='inspect') driver=task.node.driver, extension='inspect')
@ -1442,16 +1444,13 @@ class ConductorManager(periodic_task.PeriodicTasks):
try: try:
task.process_event('inspect', task.process_event('inspect',
callback=self._spawn_worker, callback=self._spawn_worker,
call_args=(_do_inspect_hardware, task, call_args=(_do_inspect_hardware, task),
self.conductor.id),
err_handler=provisioning_error_handler) err_handler=provisioning_error_handler)
except exception.InvalidState: except exception.InvalidState:
error = (_("Inspection is not possible while node " raise exception.InvalidStateRequested(
"%(node)s is in state %(state)s") action='inspect', node=task.node.uuid,
% {'node': node.uuid, state=task.node.provision_state)
'state': node.provision_state})
raise exception.HardwareInspectionFailure(error=error)
@periodic_task.periodic_task( @periodic_task.periodic_task(
spacing=CONF.conductor.check_provision_state_interval) spacing=CONF.conductor.check_provision_state_interval)
@ -1469,18 +1468,26 @@ class ConductorManager(periodic_task.PeriodicTasks):
'provision_state': states.INSPECTING, 'provision_state': states.INSPECTING,
'inspection_started_before': callback_timeout} 'inspection_started_before': callback_timeout}
sort_key = 'inspection_started_at' sort_key = 'inspection_started_at'
last_error = _("timeout reached while waiting for callback") last_error = _("timeout reached while inspecting the node")
self._fail_if_timeout_reached(context, filters, states.INSPECTING, self._fail_if_in_state(context, filters, states.INSPECTING,
sort_key, last_error=last_error) sort_key, last_error=last_error)
def _fail_if_timeout_reached(self, context, filters, provision_state, def _fail_if_in_state(self, context, filters, provision_state,
sort_key, callback_method=None, sort_key, callback_method=None,
err_handler=None, last_error=None): err_handler=None, last_error=None):
"""Checks if the async(background) process has reached timeout. """Fail nodes that are in specified state.
Retrieves nodes that satisfy the criteria in 'filters'.
If any of these nodes is in 'provision_state', it has failed
in whatever provisioning activity it was currently doing.
That failure is processed here.
:param: context: request context :param: context: request context
:param: filters: criteria (as a dictionary) to get the desired :param: filters: criteria (as a dictionary) to get the desired
list of nodes. list of nodes that satisfy the filter constraints.
For example, if filters['provisioned_before'] = 60,
this would process nodes whose provision_updated_at
field value was 60 or more seconds before 'now'.
:param: provision_state: provision_state that the node is in, :param: provision_state: provision_state that the node is in,
for the provisioning activity to have failed. for the provisioning activity to have failed.
:param: sort_key: the nodes are sorted based on this key. :param: sort_key: the nodes are sorted based on this key.
@ -1488,8 +1495,9 @@ class ConductorManager(periodic_task.PeriodicTasks):
spawned thread, for a failed node. This spawned thread, for a failed node. This
method must take a :class:`TaskManager` as method must take a :class:`TaskManager` as
the first (and only required) parameter. the first (and only required) parameter.
:param: err_handler: the error handler to be invoked if an error. :param: err_handler: for a failed node, the error handler to invoke
occurs trying to spawn a thread for a failed node. if an error occurs trying to spawn an thread
to do the callback_method.
:param: last_error: the error message to be updated in node.last_error :param: last_error: the error message to be updated in node.last_error
""" """
@ -1516,9 +1524,6 @@ class ConductorManager(periodic_task.PeriodicTasks):
call_args=(callback_method, task), call_args=(callback_method, task),
err_handler=err_handler) err_handler=err_handler)
else: else:
if not last_error:
last_error = _("timeout reached while waiting "
"for callback")
task.node.last_error = last_error task.node.last_error = last_error
task.process_event('fail') task.process_event('fail')
except exception.NoFreeConductorWorker: except exception.NoFreeConductorWorker:
@ -1802,12 +1807,19 @@ def do_sync_power_state(task, count):
return count return count
def _do_inspect_hardware(task, conductor_id): def _do_inspect_hardware(task):
"""Prepare the environment and inspect a node.""" """Initiates inspection.
:param: task: a TaskManager instance with an exclusive lock
on its node.
:raises: HardwareInspectionFailure if driver doesn't
return the state as states.MANAGEABLE or
states.INSPECTING.
"""
node = task.node node = task.node
def handle_failure(e): def handle_failure(e):
# NOTE(deva): there is no need to clear conductor_affinity
node.last_error = e node.last_error = e
task.process_event('fail') task.process_event('fail')
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"), LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
@ -1821,16 +1833,12 @@ def _do_inspect_hardware(task, conductor_id):
error = str(e) error = str(e)
handle_failure(error) handle_failure(error)
# Update conductor_affinity to reference this conductor's ID
# since there may be local persistent state
node.conductor_affinity = conductor_id
if new_state == states.MANAGEABLE: if new_state == states.MANAGEABLE:
task.process_event('done') task.process_event('done')
LOG.info(_LI('Successfully inspected node %(node)s') LOG.info(_LI('Successfully inspected node %(node)s')
% {'node': node.uuid}) % {'node': node.uuid})
elif new_state != states.INSPECTING: elif new_state != states.INSPECTING:
error = (_("Driver returned unexpected state in inspection" error = (_("During inspection, driver returned unexpected "
"%(state)s") % {'state': new_state}) "state %(state)s") % {'state': new_state})
handle_failure(error) handle_failure(error)
raise exception.HardwareInspectionFailure(error=error) raise exception.HardwareInspectionFailure(error=error)

View File

@ -497,6 +497,8 @@ class ConductorAPI(object):
async task. async task.
:raises: UnsupportedDriverExtension if the node's driver doesn't :raises: UnsupportedDriverExtension if the node's driver doesn't
support inspection. support inspection.
:raises: InvalidStateRequested if 'inspect' is not a valid
action to do in the current state.
""" """
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24') cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')

View File

@ -379,13 +379,12 @@ class Connection(api.Connection):
if values['provision_state'] == states.INSPECTING: if values['provision_state'] == states.INSPECTING:
values['inspection_started_at'] = timeutils.utcnow() values['inspection_started_at'] = timeutils.utcnow()
values['inspection_finished_at'] = None values['inspection_finished_at'] = None
elif (ref.provision_state == states.INSPECTING and
if (ref.provision_state == states.INSPECTING and values['provision_state'] == states.MANAGEABLE):
values.get('provision_state') == states.MANAGEABLE):
values['inspection_finished_at'] = timeutils.utcnow() values['inspection_finished_at'] = timeutils.utcnow()
values['inspection_started_at'] = None values['inspection_started_at'] = None
elif (ref.provision_state == states.INSPECTING and elif (ref.provision_state == states.INSPECTING and
values.get('provision_state') == states.INSPECTFAIL): values['provision_state'] == states.INSPECTFAIL):
values['inspection_started_at'] = None values['inspection_started_at'] = None
ref.update(values) ref.update(values)

View File

@ -3083,7 +3083,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
provision_state=states.INSPECTING) provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid) task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.MANAGEABLE mock_inspect.return_value = states.MANAGEABLE
manager._do_inspect_hardware(task, self.service.conductor.id) manager._do_inspect_hardware(task)
node.refresh() node.refresh()
self.assertEqual(states.MANAGEABLE, node.provision_state) self.assertEqual(states.MANAGEABLE, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual(states.NOSTATE, node.target_provision_state)
@ -3097,7 +3097,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
provision_state=states.INSPECTING) provision_state=states.INSPECTING)
task = task_manager.TaskManager(self.context, node.uuid) task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = states.INSPECTING mock_inspect.return_value = states.INSPECTING
manager._do_inspect_hardware(task, self.service.conductor.id) manager._do_inspect_hardware(task)
node.refresh() node.refresh()
self.assertEqual(states.INSPECTING, node.provision_state) self.assertEqual(states.INSPECTING, node.provision_state)
self.assertEqual(states.NOSTATE, node.target_provision_state) self.assertEqual(states.NOSTATE, node.target_provision_state)
@ -3113,8 +3113,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
task = task_manager.TaskManager(self.context, node.uuid) task = task_manager.TaskManager(self.context, node.uuid)
mock_inspect.return_value = None mock_inspect.return_value = None
self.assertRaises(exception.HardwareInspectionFailure, self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task, manager._do_inspect_hardware, task)
self.service.conductor.id)
node.refresh() node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state) self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state) self.assertEqual(states.MANAGEABLE, node.target_provision_state)
@ -3196,8 +3195,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
task = task_manager.TaskManager(self.context, node.uuid) task = task_manager.TaskManager(self.context, node.uuid)
self.assertRaises(exception.HardwareInspectionFailure, self.assertRaises(exception.HardwareInspectionFailure,
manager._do_inspect_hardware, task, manager._do_inspect_hardware, task)
self.service.conductor.id)
node.refresh() node.refresh()
self.assertEqual(states.INSPECTFAIL, node.provision_state) self.assertEqual(states.INSPECTFAIL, node.provision_state)
self.assertEqual(states.MANAGEABLE, node.target_provision_state) self.assertEqual(states.MANAGEABLE, node.target_provision_state)