Follow-up patch for generic node inspection
Implements the nit comments which were not addressed as part of change id If9aab90accf5a60ad3febbfc1f5d367eed987c13 Implements: blueprint ironic-node-properties-discovery Change-Id: Id6d60f460100f321624bb7d59d4e7856224fd22b
This commit is contained in:
parent
6d925f1f80
commit
37a655674e
@ -202,6 +202,10 @@
|
|||||||
# (string value)
|
# (string value)
|
||||||
#auth_strategy=keystone
|
#auth_strategy=keystone
|
||||||
|
|
||||||
|
# Enable pecan debug mode. WARNING: this is insecure and
|
||||||
|
# should not be used in production. (boolean value)
|
||||||
|
#pecan_debug=false
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Options defined in ironic.common.driver_factory
|
# Options defined in ironic.common.driver_factory
|
||||||
@ -605,7 +609,7 @@
|
|||||||
#configdrive_swift_container=ironic_configdrive_container
|
#configdrive_swift_container=ironic_configdrive_container
|
||||||
|
|
||||||
# Timeout (seconds) for waiting for node inspection. 0 -
|
# Timeout (seconds) for waiting for node inspection. 0 -
|
||||||
# unlimited. (integer value) (integer value)
|
# unlimited. (integer value)
|
||||||
#inspect_timeout=1800
|
#inspect_timeout=1800
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,10 +72,11 @@ def hide_driver_internal_info(obj):
|
|||||||
def check_allow_management_verbs(verb):
|
def check_allow_management_verbs(verb):
|
||||||
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
|
# v1.4 added the MANAGEABLE state and two verbs to move nodes into
|
||||||
# and out of that state. Reject requests to do this in older versions
|
# and out of that state. Reject requests to do this in older versions
|
||||||
if ((pecan.request.version.minor < 4 and
|
if (pecan.request.version.minor < 4 and
|
||||||
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']])
|
verb in [ir_states.VERBS['manage'], ir_states.VERBS['provide']]):
|
||||||
or (pecan.request.version.minor < 6 and
|
raise exception.NotAcceptable()
|
||||||
verb == ir_states.VERBS['inspect'])):
|
if (pecan.request.version.minor < 6 and
|
||||||
|
verb == ir_states.VERBS['inspect']):
|
||||||
raise exception.NotAcceptable()
|
raise exception.NotAcceptable()
|
||||||
|
|
||||||
|
|
||||||
@ -518,10 +519,11 @@ class Node(base.APIBase):
|
|||||||
"""The UTC date and time of the last provision state change"""
|
"""The UTC date and time of the last provision state change"""
|
||||||
|
|
||||||
inspection_finished_at = datetime.datetime
|
inspection_finished_at = datetime.datetime
|
||||||
"""The UTC date and time when the last inspection finished successfully."""
|
"""The UTC date and time when the last hardware inspection finished
|
||||||
|
successfully."""
|
||||||
|
|
||||||
inspection_started_at = datetime.datetime
|
inspection_started_at = datetime.datetime
|
||||||
"""The UTC date and time of the hardware when inspection was started"""
|
"""The UTC date and time when the hardware inspection was started"""
|
||||||
|
|
||||||
maintenance = types.boolean
|
maintenance = types.boolean
|
||||||
"""Indicates whether the node is in maintenance mode."""
|
"""Indicates whether the node is in maintenance mode."""
|
||||||
|
@ -166,7 +166,7 @@ conductor_opts = [
|
|||||||
cfg.IntOpt('inspect_timeout',
|
cfg.IntOpt('inspect_timeout',
|
||||||
default=1800,
|
default=1800,
|
||||||
help='Timeout (seconds) for waiting for node inspection. '
|
help='Timeout (seconds) for waiting for node inspection. '
|
||||||
'0 - unlimited. (integer value)'),
|
'0 - unlimited.'),
|
||||||
|
|
||||||
]
|
]
|
||||||
CONF = cfg.CONF
|
CONF = cfg.CONF
|
||||||
@ -928,8 +928,8 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
sort_key = 'provision_updated_at'
|
sort_key = 'provision_updated_at'
|
||||||
callback_method = utils.cleanup_after_timeout
|
callback_method = utils.cleanup_after_timeout
|
||||||
err_handler = provisioning_error_handler
|
err_handler = provisioning_error_handler
|
||||||
self._fail_if_timeout_reached(context, filters, states.DEPLOYWAIT,
|
self._fail_if_in_state(context, filters, states.DEPLOYWAIT,
|
||||||
sort_key, callback_method, err_handler)
|
sort_key, callback_method, err_handler)
|
||||||
|
|
||||||
def _do_takeover(self, task):
|
def _do_takeover(self, task):
|
||||||
LOG.debug(('Conductor %(cdr)s taking over node %(node)s'),
|
LOG.debug(('Conductor %(cdr)s taking over node %(node)s'),
|
||||||
@ -1403,6 +1403,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
|
@messaging.expected_exceptions(exception.NoFreeConductorWorker,
|
||||||
exception.NodeLocked,
|
exception.NodeLocked,
|
||||||
exception.HardwareInspectionFailure,
|
exception.HardwareInspectionFailure,
|
||||||
|
exception.InvalidStateRequested,
|
||||||
exception.UnsupportedDriverExtension)
|
exception.UnsupportedDriverExtension)
|
||||||
def inspect_hardware(self, context, node_id):
|
def inspect_hardware(self, context, node_id):
|
||||||
"""Inspect hardware to obtain hardware properties.
|
"""Inspect hardware to obtain hardware properties.
|
||||||
@ -1420,11 +1421,12 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
async task
|
async task
|
||||||
:raises: HardwareInspectionFailure when unable to get
|
:raises: HardwareInspectionFailure when unable to get
|
||||||
essential scheduling properties from hardware.
|
essential scheduling properties from hardware.
|
||||||
|
:raises: InvalidStateRequested if 'inspect' is not a
|
||||||
|
valid action to do in the current state.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
LOG.debug('RPC inspect_hardware called for node %s', node_id)
|
LOG.debug('RPC inspect_hardware called for node %s', node_id)
|
||||||
with task_manager.acquire(context, node_id, shared=False) as task:
|
with task_manager.acquire(context, node_id, shared=False) as task:
|
||||||
node = task.node
|
|
||||||
if not getattr(task.driver, 'inspect', None):
|
if not getattr(task.driver, 'inspect', None):
|
||||||
raise exception.UnsupportedDriverExtension(
|
raise exception.UnsupportedDriverExtension(
|
||||||
driver=task.node.driver, extension='inspect')
|
driver=task.node.driver, extension='inspect')
|
||||||
@ -1442,16 +1444,13 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
try:
|
try:
|
||||||
task.process_event('inspect',
|
task.process_event('inspect',
|
||||||
callback=self._spawn_worker,
|
callback=self._spawn_worker,
|
||||||
call_args=(_do_inspect_hardware, task,
|
call_args=(_do_inspect_hardware, task),
|
||||||
self.conductor.id),
|
|
||||||
err_handler=provisioning_error_handler)
|
err_handler=provisioning_error_handler)
|
||||||
|
|
||||||
except exception.InvalidState:
|
except exception.InvalidState:
|
||||||
error = (_("Inspection is not possible while node "
|
raise exception.InvalidStateRequested(
|
||||||
"%(node)s is in state %(state)s")
|
action='inspect', node=task.node.uuid,
|
||||||
% {'node': node.uuid,
|
state=task.node.provision_state)
|
||||||
'state': node.provision_state})
|
|
||||||
raise exception.HardwareInspectionFailure(error=error)
|
|
||||||
|
|
||||||
@periodic_task.periodic_task(
|
@periodic_task.periodic_task(
|
||||||
spacing=CONF.conductor.check_provision_state_interval)
|
spacing=CONF.conductor.check_provision_state_interval)
|
||||||
@ -1469,18 +1468,26 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
'provision_state': states.INSPECTING,
|
'provision_state': states.INSPECTING,
|
||||||
'inspection_started_before': callback_timeout}
|
'inspection_started_before': callback_timeout}
|
||||||
sort_key = 'inspection_started_at'
|
sort_key = 'inspection_started_at'
|
||||||
last_error = _("timeout reached while waiting for callback")
|
last_error = _("timeout reached while inspecting the node")
|
||||||
self._fail_if_timeout_reached(context, filters, states.INSPECTING,
|
self._fail_if_in_state(context, filters, states.INSPECTING,
|
||||||
sort_key, last_error=last_error)
|
sort_key, last_error=last_error)
|
||||||
|
|
||||||
def _fail_if_timeout_reached(self, context, filters, provision_state,
|
def _fail_if_in_state(self, context, filters, provision_state,
|
||||||
sort_key, callback_method=None,
|
sort_key, callback_method=None,
|
||||||
err_handler=None, last_error=None):
|
err_handler=None, last_error=None):
|
||||||
"""Checks if the async(background) process has reached timeout.
|
"""Fail nodes that are in specified state.
|
||||||
|
|
||||||
|
Retrieves nodes that satisfy the criteria in 'filters'.
|
||||||
|
If any of these nodes is in 'provision_state', it has failed
|
||||||
|
in whatever provisioning activity it was currently doing.
|
||||||
|
That failure is processed here.
|
||||||
|
|
||||||
:param: context: request context
|
:param: context: request context
|
||||||
:param: filters: criteria (as a dictionary) to get the desired
|
:param: filters: criteria (as a dictionary) to get the desired
|
||||||
list of nodes.
|
list of nodes that satisfy the filter constraints.
|
||||||
|
For example, if filters['provisioned_before'] = 60,
|
||||||
|
this would process nodes whose provision_updated_at
|
||||||
|
field value was 60 or more seconds before 'now'.
|
||||||
:param: provision_state: provision_state that the node is in,
|
:param: provision_state: provision_state that the node is in,
|
||||||
for the provisioning activity to have failed.
|
for the provisioning activity to have failed.
|
||||||
:param: sort_key: the nodes are sorted based on this key.
|
:param: sort_key: the nodes are sorted based on this key.
|
||||||
@ -1488,8 +1495,9 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
spawned thread, for a failed node. This
|
spawned thread, for a failed node. This
|
||||||
method must take a :class:`TaskManager` as
|
method must take a :class:`TaskManager` as
|
||||||
the first (and only required) parameter.
|
the first (and only required) parameter.
|
||||||
:param: err_handler: the error handler to be invoked if an error.
|
:param: err_handler: for a failed node, the error handler to invoke
|
||||||
occurs trying to spawn a thread for a failed node.
|
if an error occurs trying to spawn an thread
|
||||||
|
to do the callback_method.
|
||||||
:param: last_error: the error message to be updated in node.last_error
|
:param: last_error: the error message to be updated in node.last_error
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -1516,9 +1524,6 @@ class ConductorManager(periodic_task.PeriodicTasks):
|
|||||||
call_args=(callback_method, task),
|
call_args=(callback_method, task),
|
||||||
err_handler=err_handler)
|
err_handler=err_handler)
|
||||||
else:
|
else:
|
||||||
if not last_error:
|
|
||||||
last_error = _("timeout reached while waiting "
|
|
||||||
"for callback")
|
|
||||||
task.node.last_error = last_error
|
task.node.last_error = last_error
|
||||||
task.process_event('fail')
|
task.process_event('fail')
|
||||||
except exception.NoFreeConductorWorker:
|
except exception.NoFreeConductorWorker:
|
||||||
@ -1802,12 +1807,19 @@ def do_sync_power_state(task, count):
|
|||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
def _do_inspect_hardware(task, conductor_id):
|
def _do_inspect_hardware(task):
|
||||||
"""Prepare the environment and inspect a node."""
|
"""Initiates inspection.
|
||||||
|
|
||||||
|
:param: task: a TaskManager instance with an exclusive lock
|
||||||
|
on its node.
|
||||||
|
:raises: HardwareInspectionFailure if driver doesn't
|
||||||
|
return the state as states.MANAGEABLE or
|
||||||
|
states.INSPECTING.
|
||||||
|
|
||||||
|
"""
|
||||||
node = task.node
|
node = task.node
|
||||||
|
|
||||||
def handle_failure(e):
|
def handle_failure(e):
|
||||||
# NOTE(deva): there is no need to clear conductor_affinity
|
|
||||||
node.last_error = e
|
node.last_error = e
|
||||||
task.process_event('fail')
|
task.process_event('fail')
|
||||||
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
|
LOG.error(_LE("Failed to inspect node %(node)s: %(err)s"),
|
||||||
@ -1821,16 +1833,12 @@ def _do_inspect_hardware(task, conductor_id):
|
|||||||
error = str(e)
|
error = str(e)
|
||||||
handle_failure(error)
|
handle_failure(error)
|
||||||
|
|
||||||
# Update conductor_affinity to reference this conductor's ID
|
|
||||||
# since there may be local persistent state
|
|
||||||
node.conductor_affinity = conductor_id
|
|
||||||
|
|
||||||
if new_state == states.MANAGEABLE:
|
if new_state == states.MANAGEABLE:
|
||||||
task.process_event('done')
|
task.process_event('done')
|
||||||
LOG.info(_LI('Successfully inspected node %(node)s')
|
LOG.info(_LI('Successfully inspected node %(node)s')
|
||||||
% {'node': node.uuid})
|
% {'node': node.uuid})
|
||||||
elif new_state != states.INSPECTING:
|
elif new_state != states.INSPECTING:
|
||||||
error = (_("Driver returned unexpected state in inspection"
|
error = (_("During inspection, driver returned unexpected "
|
||||||
"%(state)s") % {'state': new_state})
|
"state %(state)s") % {'state': new_state})
|
||||||
handle_failure(error)
|
handle_failure(error)
|
||||||
raise exception.HardwareInspectionFailure(error=error)
|
raise exception.HardwareInspectionFailure(error=error)
|
||||||
|
@ -497,6 +497,8 @@ class ConductorAPI(object):
|
|||||||
async task.
|
async task.
|
||||||
:raises: UnsupportedDriverExtension if the node's driver doesn't
|
:raises: UnsupportedDriverExtension if the node's driver doesn't
|
||||||
support inspection.
|
support inspection.
|
||||||
|
:raises: InvalidStateRequested if 'inspect' is not a valid
|
||||||
|
action to do in the current state.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')
|
cctxt = self.client.prepare(topic=topic or self.topic, version='1.24')
|
||||||
|
@ -379,13 +379,12 @@ class Connection(api.Connection):
|
|||||||
if values['provision_state'] == states.INSPECTING:
|
if values['provision_state'] == states.INSPECTING:
|
||||||
values['inspection_started_at'] = timeutils.utcnow()
|
values['inspection_started_at'] = timeutils.utcnow()
|
||||||
values['inspection_finished_at'] = None
|
values['inspection_finished_at'] = None
|
||||||
|
elif (ref.provision_state == states.INSPECTING and
|
||||||
if (ref.provision_state == states.INSPECTING and
|
values['provision_state'] == states.MANAGEABLE):
|
||||||
values.get('provision_state') == states.MANAGEABLE):
|
|
||||||
values['inspection_finished_at'] = timeutils.utcnow()
|
values['inspection_finished_at'] = timeutils.utcnow()
|
||||||
values['inspection_started_at'] = None
|
values['inspection_started_at'] = None
|
||||||
elif (ref.provision_state == states.INSPECTING and
|
elif (ref.provision_state == states.INSPECTING and
|
||||||
values.get('provision_state') == states.INSPECTFAIL):
|
values['provision_state'] == states.INSPECTFAIL):
|
||||||
values['inspection_started_at'] = None
|
values['inspection_started_at'] = None
|
||||||
|
|
||||||
ref.update(values)
|
ref.update(values)
|
||||||
|
@ -3083,7 +3083,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
|||||||
provision_state=states.INSPECTING)
|
provision_state=states.INSPECTING)
|
||||||
task = task_manager.TaskManager(self.context, node.uuid)
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
mock_inspect.return_value = states.MANAGEABLE
|
mock_inspect.return_value = states.MANAGEABLE
|
||||||
manager._do_inspect_hardware(task, self.service.conductor.id)
|
manager._do_inspect_hardware(task)
|
||||||
node.refresh()
|
node.refresh()
|
||||||
self.assertEqual(states.MANAGEABLE, node.provision_state)
|
self.assertEqual(states.MANAGEABLE, node.provision_state)
|
||||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||||
@ -3097,7 +3097,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
|||||||
provision_state=states.INSPECTING)
|
provision_state=states.INSPECTING)
|
||||||
task = task_manager.TaskManager(self.context, node.uuid)
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
mock_inspect.return_value = states.INSPECTING
|
mock_inspect.return_value = states.INSPECTING
|
||||||
manager._do_inspect_hardware(task, self.service.conductor.id)
|
manager._do_inspect_hardware(task)
|
||||||
node.refresh()
|
node.refresh()
|
||||||
self.assertEqual(states.INSPECTING, node.provision_state)
|
self.assertEqual(states.INSPECTING, node.provision_state)
|
||||||
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
self.assertEqual(states.NOSTATE, node.target_provision_state)
|
||||||
@ -3113,8 +3113,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
|||||||
task = task_manager.TaskManager(self.context, node.uuid)
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
mock_inspect.return_value = None
|
mock_inspect.return_value = None
|
||||||
self.assertRaises(exception.HardwareInspectionFailure,
|
self.assertRaises(exception.HardwareInspectionFailure,
|
||||||
manager._do_inspect_hardware, task,
|
manager._do_inspect_hardware, task)
|
||||||
self.service.conductor.id)
|
|
||||||
node.refresh()
|
node.refresh()
|
||||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||||
@ -3196,8 +3195,7 @@ class NodeInspectHardware(_ServiceSetUpMixin,
|
|||||||
task = task_manager.TaskManager(self.context, node.uuid)
|
task = task_manager.TaskManager(self.context, node.uuid)
|
||||||
|
|
||||||
self.assertRaises(exception.HardwareInspectionFailure,
|
self.assertRaises(exception.HardwareInspectionFailure,
|
||||||
manager._do_inspect_hardware, task,
|
manager._do_inspect_hardware, task)
|
||||||
self.service.conductor.id)
|
|
||||||
node.refresh()
|
node.refresh()
|
||||||
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
self.assertEqual(states.INSPECTFAIL, node.provision_state)
|
||||||
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
self.assertEqual(states.MANAGEABLE, node.target_provision_state)
|
||||||
|
Loading…
Reference in New Issue
Block a user