diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index 0acef9952e..7d5bfbf932 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -202,6 +202,10 @@ # (string value) #auth_strategy=keystone +# Enable pecan debug mode. WARNING: this is insecure and +# should not be used in production. (boolean value) +#pecan_debug=false + # # Options defined in ironic.common.driver_factory @@ -605,7 +609,7 @@ #configdrive_swift_container=ironic_configdrive_container # Timeout (seconds) for waiting for node inspection. 0 - -# unlimited. (integer value) (integer value) +# unlimited. (integer value) #inspect_timeout=1800 diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index 0eee6cc67d..acb5b4f8a2 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -72,10 +72,11 @@ def hide_driver_internal_info(obj): def check_allow_management_verbs(verb): # v1.4 added the MANAGEABLE state and two verbs to move nodes into # and out of that state. Reject requests to do this in older versions - if ((pecan.request.version.minor < 4 and - verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]) - or (pecan.request.version.minor < 6 and - verb == ir_states.VERBS['inspect'])): + if (pecan.request.version.minor < 4 and + verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]): + raise exception.NotAcceptable() + if (pecan.request.version.minor < 6 and + verb == ir_states.VERBS['inspect']): raise exception.NotAcceptable() @@ -518,10 +519,11 @@ class Node(base.APIBase): """The UTC date and time of the last provision state change""" inspection_finished_at = datetime.datetime - """The UTC date and time when the last inspection finished successfully.""" + """The UTC date and time when the last hardware inspection finished + successfully.""" inspection_started_at = datetime.datetime - """The UTC date and time of the hardware when inspection was started""" + """The UTC date and time when the hardware inspection was started""" maintenance = types.boolean """Indicates whether the node is in maintenance mode.""" diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index cb3a90fe5f..fe9c368347 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -166,7 +166,7 @@ conductor_opts = [ cfg.IntOpt('inspect_timeout', default=1800, help='Timeout (seconds) for waiting for node inspection. ' - '0 - unlimited. (integer value)'), + '0 - unlimited.'), ] CONF = cfg.CONF @@ -928,8 +928,8 @@ class ConductorManager(periodic_task.PeriodicTasks): sort_key = 'provision_updated_at' callback_method = utils.cleanup_after_timeout err_handler = provisioning_error_handler - self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT, - sort_key, callback_method, err_handler) + self._fail_if_in_state(context, filters, states.DEPLOYWAIT, + sort_key, callback_method, err_handler) def _do_takeover(self, task): LOG.debug(('Conductor %(cdr)s taking over node %(node)s'), @@ -1403,6 +1403,7 @@ class ConductorManager(periodic_task.PeriodicTasks): @messaging.expected_exceptions(exception.NoFreeConductorWorker, exception.NodeLocked, exception.HardwareInspectionFailure, + exception.InvalidStateRequested, exception.UnsupportedDriverExtension) def inspect_hardware(self, context, node_id): """Inspect hardware to obtain hardware properties. @@ -1420,11 +1421,12 @@ class ConductorManager(periodic_task.PeriodicTasks): async task :raises: HardwareInspectionFailure when unable to get essential scheduling properties from hardware. + :raises: InvalidStateRequested if 'inspect' is not a + valid action to do in the current state. """ LOG.debug('RPC inspect_hardware called for node %s', node_id) with task_manager.acquire(context, node_id, shared=False) as task: - node = task.node if not getattr(task.driver, 'inspect', None): raise exception.UnsupportedDriverExtension( driver=task.node.driver, extension='inspect') @@ -1442,16 +1444,13 @@ class ConductorManager(periodic_task.PeriodicTasks): try: task.process_event('inspect', callback=self._spawn_worker, - call_args=(_do_inspect_hardware, task, - self.conductor.id), + call_args=(_do_inspect_hardware, task), err_handler=provisioning_error_handler) except exception.InvalidState: - error = (_("Inspection is not possible while node " - "%(node)s is in state %(state)s") - % {'node': node.uuid, - 'state': node.provision_state}) - raise exception.HardwareInspectionFailure(error=error) + raise exception.InvalidStateRequested( + action='inspect', node=task.node.uuid, + state=task.node.provision_state) @periodic_task.periodic_task( spacing=CONF.conductor.check_provision_state_interval) @@ -1469,18 +1468,26 @@ class ConductorManager(periodic_task.PeriodicTasks): 'provision_state': states.INSPECTING, 'inspection_started_before': callback_timeout} sort_key = 'inspection_started_at' - last_error = _("timeout reached while waiting for callback") - self._fail_if_timeout_reached(context, filters, states.INSPECTING, - sort_key, last_error=last_error) + last_error = _("timeout reached while inspecting the node") + self._fail_if_in_state(context, filters, states.INSPECTING, + sort_key, last_error=last_error) - def _fail_if_timeout_reached(self, context, filters, provision_state, - sort_key, callback_method=None, - err_handler=None, last_error=None): - """Checks if the async(background) process has reached timeout. + def _fail_if_in_state(self, context, filters, provision_state, + sort_key, callback_method=None, + err_handler=None, last_error=None): + """Fail nodes that are in specified state. + + Retrieves nodes that satisfy the criteria in 'filters'. + If any of these nodes is in 'provision_state', it has failed + in whatever provisioning activity it was currently doing. + That failure is processed here. :param: context: request context :param: filters: criteria (as a dictionary) to get the desired - list of nodes. + list of nodes that satisfy the filter constraints. + For example, if filters['provisioned_before'] = 60, + this would process nodes whose provision_updated_at + field value was 60 or more seconds before 'now'. :param: provision_state: provision_state that the node is in, for the provisioning activity to have failed. :param: sort_key: the nodes are sorted based on this key. @@ -1488,8 +1495,9 @@ class ConductorManager(periodic_task.PeriodicTasks): spawned thread, for a failed node. This method must take a :class:`TaskManager` as the first (and only required) parameter. - :param: err_handler: the error handler to be invoked if an error. - occurs trying to spawn a thread for a failed node. + :param: err_handler: for a failed node, the error handler to invoke + if an error occurs trying to spawn an thread + to do the callback_method. :param: last_error: the error message to be updated in node.last_error """ @@ -1516,9 +1524,6 @@ class ConductorManager(periodic_task.PeriodicTasks): call_args=(callback_method, task), err_handler=err_handler) else: - if not last_error: - last_error = _("timeout reached while waiting " - "for callback") task.node.last_error = last_error task.process_event('fail') except exception.NoFreeConductorWorker: @@ -1802,12 +1807,19 @@ def do_sync_power_state(task, count): return count -def _do_inspect_hardware(task, conductor_id): - """Prepare the environment and inspect a node.""" +def _do_inspect_hardware(task): + """Initiates inspection. + + :param: task: a TaskManager instance with an exclusive lock + on its node. + :raises: HardwareInspectionFailure if driver doesn't + return the state as states.MANAGEABLE or + states.INSPECTING. + + """ node = task.node def handle_failure(e): - # NOTE(deva): there is no need to clear conductor_affinity node.last_error = e task.process_event('fail') LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"), @@ -1821,16 +1833,12 @@ def _do_inspect_hardware(task, conductor_id): error = str(e) handle_failure(error) - # Update conductor_affinity to reference this conductor's ID - # since there may be local persistent state - node.conductor_affinity = conductor_id - if new_state == states.MANAGEABLE: task.process_event('done') LOG.info(_LI('Successfully inspected node %(node)s') % {'node': node.uuid}) elif new_state != states.INSPECTING: - error = (_("Driver returned unexpected state in inspection" - "%(state)s") % {'state': new_state}) + error = (_("During inspection, driver returned unexpected " + "state %(state)s") % {'state': new_state}) handle_failure(error) raise exception.HardwareInspectionFailure(error=error) diff --git a/ironic/conductor/rpcapi.py b/ironic/conductor/rpcapi.py index 173ea68929..b3c1eb57c2 100644 --- a/ironic/conductor/rpcapi.py +++ b/ironic/conductor/rpcapi.py @@ -497,6 +497,8 @@ class ConductorAPI(object): async task. :raises: UnsupportedDriverExtension if the node's driver doesn't support inspection. + :raises: InvalidStateRequested if 'inspect' is not a valid + action to do in the current state. """ cctxt = self.client.prepare(topic=topic or self.topic, version='1.24') diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py index 5a529d518e..a6e1031d5f 100644 --- a/ironic/db/sqlalchemy/api.py +++ b/ironic/db/sqlalchemy/api.py @@ -379,13 +379,12 @@ class Connection(api.Connection): if values['provision_state'] == states.INSPECTING: values['inspection_started_at'] = timeutils.utcnow() values['inspection_finished_at'] = None - - if (ref.provision_state == states.INSPECTING and - values.get('provision_state') == states.MANAGEABLE): + elif (ref.provision_state == states.INSPECTING and + values['provision_state'] == states.MANAGEABLE): values['inspection_finished_at'] = timeutils.utcnow() values['inspection_started_at'] = None - elif (ref.provision_state == states.INSPECTING and - values.get('provision_state') == states.INSPECTFAIL): + elif (ref.provision_state == states.INSPECTING and + values['provision_state'] == states.INSPECTFAIL): values['inspection_started_at'] = None ref.update(values) diff --git a/ironic/tests/conductor/test_manager.py b/ironic/tests/conductor/test_manager.py index 0640f34805..a034389285 100644 --- a/ironic/tests/conductor/test_manager.py +++ b/ironic/tests/conductor/test_manager.py @@ -3083,7 +3083,7 @@ class NodeInspectHardware(_ServiceSetUpMixin, provision_state=states.INSPECTING) task = task_manager.TaskManager(self.context, node.uuid) mock_inspect.return_value = states.MANAGEABLE - manager._do_inspect_hardware(task, self.service.conductor.id) + manager._do_inspect_hardware(task) node.refresh() self.assertEqual(states.MANAGEABLE, node.provision_state) self.assertEqual(states.NOSTATE, node.target_provision_state) @@ -3097,7 +3097,7 @@ class NodeInspectHardware(_ServiceSetUpMixin, provision_state=states.INSPECTING) task = task_manager.TaskManager(self.context, node.uuid) mock_inspect.return_value = states.INSPECTING - manager._do_inspect_hardware(task, self.service.conductor.id) + manager._do_inspect_hardware(task) node.refresh() self.assertEqual(states.INSPECTING, node.provision_state) self.assertEqual(states.NOSTATE, node.target_provision_state) @@ -3113,8 +3113,7 @@ class NodeInspectHardware(_ServiceSetUpMixin, task = task_manager.TaskManager(self.context, node.uuid) mock_inspect.return_value = None self.assertRaises(exception.HardwareInspectionFailure, - manager._do_inspect_hardware, task, - self.service.conductor.id) + manager._do_inspect_hardware, task) node.refresh() self.assertEqual(states.INSPECTFAIL, node.provision_state) self.assertEqual(states.MANAGEABLE, node.target_provision_state) @@ -3196,8 +3195,7 @@ class NodeInspectHardware(_ServiceSetUpMixin, task = task_manager.TaskManager(self.context, node.uuid) self.assertRaises(exception.HardwareInspectionFailure, - manager._do_inspect_hardware, task, - self.service.conductor.id) + manager._do_inspect_hardware, task) node.refresh() self.assertEqual(states.INSPECTFAIL, node.provision_state) self.assertEqual(states.MANAGEABLE, node.target_provision_state)