Allow abort for CLEANWAIT states

This patch allows a node in CLEANWAIT to be aborted.

The @clean_step decorator has been extended to accept a new parameter
"abortable". By default this parameter defaults to False since a clean
step could potentially brick a machine so we better make it explicit.

If the clean step is abortable, the process of aborting will happen
immediately; if the clean step is not abortable the abortion will happen
as soon as the clean step is done. If a clean step is marked to have
the abortion done after its completion but it is the final clean step in
the cleaning operation the cleaning process will just finish successfully.

A new verb 'abort' is being added to the API and the microversion is
being bumped to 1.13.

Closes-Bug: #1455825
Change-Id: Ia6846c048b3dab44a8280366a7305aca1d3eb783
This commit is contained in:
Lucas Alvares Gomes 2015-09-07 10:04:18 +01:00
parent cd32fa5421
commit 795b5e37ca
9 changed files with 357 additions and 93 deletions

View File

@ -32,6 +32,11 @@ always requests the newest supported API version.
API Versions History API Versions History
-------------------- --------------------
**1.13**
Add a new verb ``abort`` to the API used to abort nodes in
``CLEANWAIT`` state.
**1.12** **1.12**
Add ability to get/set ``node.target_raid_config`` and to get Add ability to get/set ``node.target_raid_config`` and to get

View File

@ -67,8 +67,14 @@ MIN_VERB_VERSIONS = {
ir_states.VERBS['provide']: versions.MINOR_4_MANAGEABLE_STATE, ir_states.VERBS['provide']: versions.MINOR_4_MANAGEABLE_STATE,
ir_states.VERBS['inspect']: versions.MINOR_6_INSPECT_STATE, ir_states.VERBS['inspect']: versions.MINOR_6_INSPECT_STATE,
ir_states.VERBS['abort']: versions.MINOR_13_ABORT_VERB,
} }
# States where calling do_provisioning_action makes sense
PROVISION_ACTION_STATES = (ir_states.VERBS['manage'],
ir_states.VERBS['provide'],
ir_states.VERBS['abort'])
def hide_fields_in_newer_versions(obj): def hide_fields_in_newer_versions(obj):
# if requested version is < 1.3, hide driver_internal_info # if requested version is < 1.3, hide driver_internal_info
@ -425,7 +431,7 @@ class NodeStatesController(rest.RestController):
of the requested action. of the requested action.
:param node_ident: UUID or logical name of a node. :param node_ident: UUID or logical name of a node.
:param target: The desired provision state of the node. :param target: The desired provision state of the node or verb.
:param configdrive: Optional. A gzipped and base64 encoded :param configdrive: Optional. A gzipped and base64 encoded
configdrive. Only valid when setting provision state configdrive. Only valid when setting provision state
to "active". to "active".
@ -487,8 +493,7 @@ class NodeStatesController(rest.RestController):
elif target == ir_states.VERBS['inspect']: elif target == ir_states.VERBS['inspect']:
pecan.request.rpcapi.inspect_hardware( pecan.request.rpcapi.inspect_hardware(
pecan.request.context, rpc_node.uuid, topic=topic) pecan.request.context, rpc_node.uuid, topic=topic)
elif target in ( elif target in PROVISION_ACTION_STATES:
ir_states.VERBS['manage'], ir_states.VERBS['provide']):
pecan.request.rpcapi.do_provisioning_action( pecan.request.rpcapi.do_provisioning_action(
pecan.request.context, rpc_node.uuid, target, topic) pecan.request.context, rpc_node.uuid, target, topic)
else: else:

View File

@ -40,6 +40,7 @@ BASE_VERSION = 1
# v1.10: Logical node names support RFC 3986 unreserved characters # v1.10: Logical node names support RFC 3986 unreserved characters
# v1.11: Nodes appear in ENROLL state by default # v1.11: Nodes appear in ENROLL state by default
# v1.12: Add support for RAID # v1.12: Add support for RAID
# v1.13: Add 'abort' verb to CLEANWAIT
MINOR_0_JUNO = 0 MINOR_0_JUNO = 0
MINOR_1_INITIAL_VERSION = 1 MINOR_1_INITIAL_VERSION = 1
@ -54,11 +55,12 @@ MINOR_9_PROVISION_STATE_FILTER = 9
MINOR_10_UNRESTRICTED_NODE_NAME = 10 MINOR_10_UNRESTRICTED_NODE_NAME = 10
MINOR_11_ENROLL_STATE = 11 MINOR_11_ENROLL_STATE = 11
MINOR_12_RAID_CONFIG = 12 MINOR_12_RAID_CONFIG = 12
MINOR_13_ABORT_VERB = 13
# When adding another version, update MINOR_MAX_VERSION and also update # When adding another version, update MINOR_MAX_VERSION and also update
# doc/source/webapi/v1.rst with a detailed explanation of what the version has # doc/source/webapi/v1.rst with a detailed explanation of what the version has
# changed. # changed.
MINOR_MAX_VERSION = MINOR_12_RAID_CONFIG MINOR_MAX_VERSION = MINOR_13_ABORT_VERB
# String representations of the minor and maximum versions # String representations of the minor and maximum versions
MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION) MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION)

View File

@ -45,6 +45,7 @@ VERBS = {
'manage': 'manage', 'manage': 'manage',
'provide': 'provide', 'provide': 'provide',
'inspect': 'inspect', 'inspect': 'inspect',
'abort': 'abort',
} }
""" Mapping of state-changing events that are PUT to the REST API """ Mapping of state-changing events that are PUT to the REST API
@ -289,6 +290,9 @@ machine.add_transition(CLEANING, AVAILABLE, 'done')
machine.add_transition(CLEANING, CLEANFAIL, 'fail') machine.add_transition(CLEANING, CLEANFAIL, 'fail')
machine.add_transition(CLEANWAIT, CLEANFAIL, 'fail') machine.add_transition(CLEANWAIT, CLEANFAIL, 'fail')
# While waiting for a clean step to be finished, cleaning may be aborted
machine.add_transition(CLEANWAIT, CLEANFAIL, 'abort')
# Cleaning may also wait on external callbacks # Cleaning may also wait on external callbacks
machine.add_transition(CLEANING, CLEANWAIT, 'wait') machine.add_transition(CLEANING, CLEANWAIT, 'wait')
machine.add_transition(CLEANWAIT, CLEANING, 'resume') machine.add_transition(CLEANWAIT, CLEANING, 'resume')

View File

@ -830,6 +830,34 @@ class ConductorManager(periodic_task.PeriodicTasks):
state=node.provision_state) state=node.provision_state)
self._do_node_clean(task) self._do_node_clean(task)
def _get_node_next_clean_steps(self, task):
"""Get the task's node's next clean steps.
:param task: A TaskManager object
:raises: NodeCleaningFailure if an internal error occurred when
getting the next clean steps
"""
node = task.node
if not node.clean_step:
return []
next_steps = node.driver_internal_info.get('clean_steps', [])
try:
# Trim off the last clean step (now finished) and
# all previous steps
next_steps = next_steps[next_steps.index(node.clean_step) + 1:]
except ValueError:
msg = (_('Node %(node)s got an invalid last step for '
'%(state)s: %(step)s.') %
{'node': node.uuid, 'step': node.clean_step,
'state': node.provision_state})
LOG.exception(msg)
cleaning_error_handler(task, msg)
raise exception.NodeCleaningFailure(node=node.uuid,
reason=msg)
return next_steps
def continue_node_clean(self, context, node_id): def continue_node_clean(self, context, node_id):
"""RPC method to continue cleaning a node. """RPC method to continue cleaning a node.
@ -844,6 +872,8 @@ class ConductorManager(periodic_task.PeriodicTasks):
async task async task
:raises: NodeLocked if node is locked by another conductor. :raises: NodeLocked if node is locked by another conductor.
:raises: NodeNotFound if the node no longer appears in the database :raises: NodeNotFound if the node no longer appears in the database
:raises: NodeCleaningFailure if an internal error occurred when
getting the next clean steps
""" """
LOG.debug("RPC continue_node_clean called for node %s.", node_id) LOG.debug("RPC continue_node_clean called for node %s.", node_id)
@ -862,8 +892,32 @@ class ConductorManager(periodic_task.PeriodicTasks):
{'node': task.node.uuid, {'node': task.node.uuid,
'state': task.node.provision_state, 'state': task.node.provision_state,
'clean_state': states.CLEANWAIT}) 'clean_state': states.CLEANWAIT})
task.set_spawn_error_hook(cleaning_error_handler, task.node,
'Failed to run next clean step') next_steps = self._get_node_next_clean_steps(task)
# If this isn't the final clean step in the cleaning operation
# and it is flagged to abort after the clean step that just
# finished, we abort the cleaning operaation.
if task.node.clean_step.get('abort_after'):
step_name = task.node.clean_step['step']
if next_steps:
LOG.debug('The cleaning operation for node %(node)s was '
'marked to be aborted after step "%(step)s '
'completed. Aborting now that it has completed.',
{'node': task.node.uuid, 'step': step_name})
task.process_event('abort',
callback=self._spawn_worker,
call_args=(self._do_node_clean_abort,
task, step_name),
err_handler=provisioning_error_handler)
return
LOG.debug('The cleaning operation for node %(node)s was '
'marked to be aborted after step "%(step)s" '
'completed. However, since there are no more '
'clean steps after this, the abort is not going '
'to be done.', {'node': task.node.uuid,
'step': step_name})
# TODO(lucasagomes): This conditional is here for backwards # TODO(lucasagomes): This conditional is here for backwards
# compat with previous code. Should be removed once the Mitaka # compat with previous code. Should be removed once the Mitaka
@ -871,12 +925,12 @@ class ConductorManager(periodic_task.PeriodicTasks):
if task.node.provision_state == states.CLEANWAIT: if task.node.provision_state == states.CLEANWAIT:
task.process_event('resume') task.process_event('resume')
task.set_spawn_error_hook(cleaning_error_handler, task.node,
_('Failed to run next clean step'))
task.spawn_after( task.spawn_after(
self._spawn_worker, self._spawn_worker,
self._do_next_clean_step, self._do_next_clean_step,
task, task, next_steps)
task.node.driver_internal_info.get('clean_steps', []),
task.node.clean_step)
def _do_node_clean(self, task): def _do_node_clean(self, task):
"""Internal RPC method to perform automated cleaning of a node.""" """Internal RPC method to perform automated cleaning of a node."""
@ -932,32 +986,16 @@ class ConductorManager(periodic_task.PeriodicTasks):
set_node_cleaning_steps(task) set_node_cleaning_steps(task)
self._do_next_clean_step( self._do_next_clean_step(
task, task,
node.driver_internal_info.get('clean_steps', []), node.driver_internal_info.get('clean_steps', []))
node.clean_step)
def _do_next_clean_step(self, task, steps, last_step): def _do_next_clean_step(self, task, steps):
"""Start executing cleaning/zapping steps from the last step (if any). """Start executing cleaning/zapping steps.
:param task: a TaskManager instance with an exclusive lock :param task: a TaskManager instance with an exclusive lock
:param steps: The complete list of steps that need to be executed :param steps: The list of remaining steps that need to be executed
on the node on the node
:param last_step: The last step that was executed. {} will start
from the beginning
""" """
node = task.node node = task.node
# Trim already executed steps
if last_step:
try:
# Trim off last_step (now finished) and all previous steps.
steps = steps[steps.index(last_step) + 1:]
except ValueError:
msg = (_('Node %(node)s got an invalid last step for '
'%(state)s: %(step)s.') %
{'node': node.uuid, 'step': last_step,
'state': node.provision_state})
LOG.exception(msg)
return cleaning_error_handler(task, msg)
LOG.info(_LI('Executing %(state)s on node %(node)s, remaining steps: ' LOG.info(_LI('Executing %(state)s on node %(node)s, remaining steps: '
'%(steps)s'), {'node': node.uuid, 'steps': steps, '%(steps)s'), {'node': node.uuid, 'steps': steps,
'state': node.provision_state}) 'state': node.provision_state})
@ -1056,6 +1094,36 @@ class ConductorManager(periodic_task.PeriodicTasks):
node.target_provision_state = None node.target_provision_state = None
node.save() node.save()
def _do_node_clean_abort(self, task, step_name=None):
"""Internal method to abort an ongoing operation.
:param task: a TaskManager instance with an exclusive lock
:param step_name: The name of the clean step.
"""
node = task.node
try:
task.driver.deploy.tear_down_cleaning(task)
except Exception as e:
LOG.exception(_LE('Failed to tear down cleaning for node %(node)s '
'after aborting the operation. Error: %(err)s'),
{'node': node.uuid, 'err': e})
error_msg = _('Failed to tear down cleaning after aborting '
'the operation')
cleaning_error_handler(task, error_msg, tear_down_cleaning=False,
set_fail_state=False)
return
info_message = _('Clean operation aborted for node %s') % node.uuid
last_error = _('By request, the clean operation was aborted')
if step_name:
msg = _(' after the completion of step "%s"') % step_name
last_error += msg
info_message += msg
node.last_error = last_error
node.save()
LOG.info(info_message)
@messaging.expected_exceptions(exception.NoFreeConductorWorker, @messaging.expected_exceptions(exception.NoFreeConductorWorker,
exception.NodeLocked, exception.NodeLocked,
exception.InvalidParameterValue, exception.InvalidParameterValue,
@ -1084,19 +1152,55 @@ class ConductorManager(periodic_task.PeriodicTasks):
callback=self._spawn_worker, callback=self._spawn_worker,
call_args=(self._do_node_clean, task), call_args=(self._do_node_clean, task),
err_handler=provisioning_error_handler) err_handler=provisioning_error_handler)
elif (action == states.VERBS['manage'] and return
if (action == states.VERBS['manage'] and
task.node.provision_state == states.ENROLL): task.node.provision_state == states.ENROLL):
task.process_event('manage', task.process_event('manage',
callback=self._spawn_worker, callback=self._spawn_worker,
call_args=(self._do_node_verify, task), call_args=(self._do_node_verify, task),
err_handler=provisioning_error_handler) err_handler=provisioning_error_handler)
else: return
try:
task.process_event(action) if (action == states.VERBS['abort'] and
except exception.InvalidState: task.node.provision_state == states.CLEANWAIT):
raise exception.InvalidStateRequested(
action=action, node=task.node.uuid, # Check if the clean step is abortable; if so abort it.
state=task.node.provision_state) # Otherwise, indicate in that clean step, that cleaning
# should be aborted after that step is done.
if (task.node.clean_step and not
task.node.clean_step.get('abortable')):
LOG.info(_LI('The current clean step "%(clean_step)s" for '
'node %(node)s is not abortable. Adding a '
'flag to abort the cleaning after the clean '
'step is completed.'),
{'clean_step': task.node.clean_step['step'],
'node': task.node.uuid})
clean_step = task.node.clean_step
if not clean_step.get('abort_after'):
clean_step['abort_after'] = True
task.node.clean_step = clean_step
task.node.save()
return
LOG.debug('Aborting the cleaning operation during clean step '
'"%(step)s" for node %(node)s in provision state '
'"%(prov)s".',
{'node': task.node.uuid,
'prov': task.node.provision_state,
'step': task.node.clean_step.get('step')})
task.process_event('abort',
callback=self._spawn_worker,
call_args=(self._do_node_clean_abort, task),
err_handler=provisioning_error_handler)
return
try:
task.process_event(action)
except exception.InvalidState:
raise exception.InvalidStateRequested(
action=action, node=task.node.uuid,
state=task.node.provision_state)
@periodic_task.periodic_task( @periodic_task.periodic_task(
spacing=CONF.conductor.sync_power_state_interval) spacing=CONF.conductor.sync_power_state_interval)
@ -2372,7 +2476,8 @@ def _do_inspect_hardware(task):
raise exception.HardwareInspectionFailure(error=error) raise exception.HardwareInspectionFailure(error=error)
def cleaning_error_handler(task, msg, tear_down_cleaning=True): def cleaning_error_handler(task, msg, tear_down_cleaning=True,
set_fail_state=True):
"""Put a failed node in CLEANFAIL or ZAPFAIL and maintenance.""" """Put a failed node in CLEANFAIL or ZAPFAIL and maintenance."""
# Reset clean step, msg should include current step # Reset clean step, msg should include current step
if task.node.provision_state in (states.CLEANING, states.CLEANWAIT): if task.node.provision_state in (states.CLEANING, states.CLEANWAIT):
@ -2389,7 +2494,8 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True):
'reason: %(err)s'), {'err': e, 'uuid': task.node.uuid}) 'reason: %(err)s'), {'err': e, 'uuid': task.node.uuid})
LOG.exception(msg) LOG.exception(msg)
task.process_event('fail') if set_fail_state:
task.process_event('fail')
def _step_key(step): def _step_key(step):

View File

@ -165,6 +165,7 @@ class BaseInterface(object):
# Create a CleanStep to represent this method # Create a CleanStep to represent this method
step = {'step': method.__name__, step = {'step': method.__name__,
'priority': method._clean_step_priority, 'priority': method._clean_step_priority,
'abortable': method._clean_step_abortable,
'interface': instance.interface_type} 'interface': instance.interface_type}
instance.clean_steps.append(step) instance.clean_steps.append(step)
LOG.debug('Found clean steps %(steps)s for interface %(interface)s', LOG.debug('Found clean steps %(steps)s for interface %(interface)s',
@ -978,7 +979,7 @@ class RAIDInterface(BaseInterface):
return raid.get_logical_disk_properties(self.raid_schema) return raid.get_logical_disk_properties(self.raid_schema)
def clean_step(priority): def clean_step(priority, abortable=False):
"""Decorator for cleaning and zapping steps. """Decorator for cleaning and zapping steps.
If priority is greater than 0, the function will be executed as part If priority is greater than 0, the function will be executed as part
@ -1013,10 +1014,13 @@ def clean_step(priority):
# do some cleaning # do some cleaning
:param priority: an integer priority, should be a CONF option :param priority: an integer priority, should be a CONF option
:param abortable: Boolean value. Whether the clean step is abortable
or not; defaults to False.
""" """
def decorator(func): def decorator(func):
func._is_clean_step = True func._is_clean_step = True
func._clean_step_priority = priority func._clean_step_priority = priority
func._clean_step_abortable = abortable
return func return func
return decorator return decorator

View File

@ -1973,6 +1973,37 @@ class TestPut(test_api_base.FunctionalTest):
self.assertEqual(http_client.BAD_REQUEST, ret.status_code) self.assertEqual(http_client.BAD_REQUEST, ret.status_code)
self.assertEqual(0, mock_dpa.call_count) self.assertEqual(0, mock_dpa.call_count)
def test_abort_unsupported(self):
ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid,
{'target': states.VERBS['abort']},
headers={api_base.Version.string: "1.12"},
expect_errors=True)
self.assertEqual(406, ret.status_code)
@mock.patch.object(rpcapi.ConductorAPI, 'do_provisioning_action')
def test_abort_cleanwait(self, mock_dpa):
self.node.provision_state = states.CLEANWAIT
self.node.save()
ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid,
{'target': states.VERBS['abort']},
headers={api_base.Version.string: "1.13"})
self.assertEqual(202, ret.status_code)
self.assertEqual(b'', ret.body)
mock_dpa.assert_called_once_with(mock.ANY, self.node.uuid,
states.VERBS['abort'],
'test-topic')
def test_abort_invalid_state(self):
# "abort" is only valid for nodes in CLEANWAIT
self.node.provision_state = states.CLEANING
self.node.save()
ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid,
{'target': states.VERBS['abort']},
headers={api_base.Version.string: "1.13"},
expect_errors=True)
self.assertEqual(400, ret.status_code)
def test_set_console_mode_enabled(self): def test_set_console_mode_enabled(self):
with mock.patch.object(rpcapi.ConductorAPI, with mock.patch.object(rpcapi.ConductorAPI,
'set_console_mode') as mock_scm: 'set_console_mode') as mock_scm:

View File

@ -1537,6 +1537,78 @@ class DoNodeDeployTearDownTestCase(_ServiceSetUpMixin,
self.assertIsNone(node.last_error) self.assertIsNone(node.last_error)
mock_spawn.assert_called_with(self.service._do_node_verify, mock.ANY) mock_spawn.assert_called_with(self.service._do_node_verify, mock.ANY)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
def test_do_provision_action_abort(self, mock_spawn):
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE)
self._start_service()
self.service.do_provisioning_action(self.context, node.uuid, 'abort')
node.refresh()
# Node will be moved to AVAILABLE after cleaning, not tested here
self.assertEqual(states.CLEANFAIL, node.provision_state)
self.assertEqual(states.AVAILABLE, node.target_provision_state)
self.assertIsNone(node.last_error)
mock_spawn.assert_called_with(self.service._do_node_clean_abort,
mock.ANY)
def test_do_provision_action_abort_clean_step_not_abortable(self):
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE,
clean_step={'step': 'foo', 'abortable': False})
self._start_service()
self.service.do_provisioning_action(self.context, node.uuid, 'abort')
node.refresh()
# Assert the current clean step was marked to be aborted later
self.assertIn('abort_after', node.clean_step)
self.assertTrue(node.clean_step['abort_after'])
# Make sure things stays as it was before
self.assertEqual(states.CLEANWAIT, node.provision_state)
self.assertEqual(states.AVAILABLE, node.target_provision_state)
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def _test__do_node_clean_abort(self, step_name, tear_mock):
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANFAIL,
target_provision_state=states.AVAILABLE,
clean_step={'step': 'foo', 'abortable': True})
with task_manager.acquire(self.context, node.uuid) as task:
self.service._do_node_clean_abort(task, step_name=step_name)
self.assertIsNotNone(task.node.last_error)
tear_mock.assert_called_once_with(task.driver.deploy, task)
if step_name:
self.assertIn(step_name, task.node.last_error)
def test__do_node_clean_abort(self):
self._test__do_node_clean_abort(None)
def test__do_node_clean_abort_with_step_name(self):
self._test__do_node_clean_abort('foo')
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def test__do_node_clean_abort_tear_down_fail(self, tear_mock):
tear_mock.side_effect = Exception('Surprise')
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANFAIL,
target_provision_state=states.AVAILABLE,
clean_step={'step': 'foo', 'abortable': True})
with task_manager.acquire(self.context, node.uuid) as task:
self.service._do_node_clean_abort(task)
tear_mock.assert_called_once_with(task.driver.deploy, task)
self.assertIsNotNone(task.node.last_error)
self.assertIsNotNone(task.node.maintenance_reason)
self.assertTrue(task.node.maintenance)
@_mock_record_keepalive @_mock_record_keepalive
class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase): class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
@ -1552,6 +1624,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
# Cleaning should be executed in this order # Cleaning should be executed in this order
self.clean_steps = [self.deploy_erase, self.power_update, self.clean_steps = [self.deploy_erase, self.power_update,
self.deploy_update] self.deploy_update]
self.next_clean_steps = self.clean_steps[1:]
# Zap step # Zap step
self.deploy_raid = { self.deploy_raid = {
'step': 'build_raid', 'priority': 0, 'interface': 'deploy'} 'step': 'build_raid', 'priority': 0, 'interface': 'deploy'}
@ -1654,14 +1727,13 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
target_provision_state=tgt_prv_state, target_provision_state=tgt_prv_state,
last_error=None, last_error=None,
driver_internal_info=driver_info, driver_internal_info=driver_info,
clean_step=self.clean_steps[1]) clean_step=self.clean_steps[0])
self._start_service() self._start_service()
self.service.continue_node_clean(self.context, node.uuid) self.service.continue_node_clean(self.context, node.uuid)
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
mock_spawn.assert_called_with(self.service._do_next_clean_step, mock_spawn.assert_called_with(self.service._do_next_clean_step,
mock.ANY, self.clean_steps, mock.ANY, self.next_clean_steps)
self.clean_steps[1])
def test_continue_node_clean(self): def test_continue_node_clean(self):
self._continue_node_clean(states.CLEANWAIT) self._continue_node_clean(states.CLEANWAIT)
@ -1669,6 +1741,44 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
def test_continue_node_clean_backward_compat(self): def test_continue_node_clean_backward_compat(self):
self._continue_node_clean(states.CLEANING) self._continue_node_clean(states.CLEANING)
def test_continue_node_clean_abort(self):
last_clean_step = self.clean_steps[0]
last_clean_step['abortable'] = False
last_clean_step['abort_after'] = True
driver_info = {'clean_steps': self.clean_steps}
node = obj_utils.create_test_node(
self.context, driver='fake', provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE, last_error=None,
driver_internal_info=driver_info, clean_step=self.clean_steps[0])
self._start_service()
self.service.continue_node_clean(self.context, node.uuid)
self.service._worker_pool.waitall()
node.refresh()
self.assertEqual(states.CLEANFAIL, node.provision_state)
self.assertEqual(states.AVAILABLE, node.target_provision_state)
self.assertIsNotNone(node.last_error)
# assert the clean step name is in the last error message
self.assertIn(self.clean_steps[0]['step'], node.last_error)
def test_continue_node_clean_abort_last_clean_step(self):
last_clean_step = self.clean_steps[0]
last_clean_step['abortable'] = False
last_clean_step['abort_after'] = True
driver_info = {'clean_steps': [self.clean_steps[0]]}
node = obj_utils.create_test_node(
self.context, driver='fake', provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE, last_error=None,
driver_internal_info=driver_info, clean_step=self.clean_steps[0])
self._start_service()
self.service.continue_node_clean(self.context, node.uuid)
self.service._worker_pool.waitall()
node.refresh()
self.assertEqual(states.AVAILABLE, node.provision_state)
self.assertIsNone(node.target_provision_state)
self.assertIsNone(node.last_error)
@mock.patch('ironic.drivers.modules.fake.FakePower.validate') @mock.patch('ironic.drivers.modules.fake.FakePower.validate')
def test__do_node_clean_validate_fail(self, mock_validate): def test__do_node_clean_validate_fail(self, mock_validate):
# InvalidParameterValue should be cause node to go to CLEANFAIL # InvalidParameterValue should be cause node to go to CLEANFAIL
@ -1731,7 +1841,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
node.refresh() node.refresh()
mock_validate.assert_called_once_with(task) mock_validate.assert_called_once_with(task)
mock_next_step.assert_called_once_with(mock.ANY, [], {}) mock_next_step.assert_called_once_with(mock.ANY, [])
mock_steps.assert_called_once_with(task) mock_steps.assert_called_once_with(task)
# Check that state didn't change # Check that state didn't change
@ -1753,8 +1863,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step(task, self.clean_steps, self.service._do_next_clean_step(task, self.clean_steps)
node.clean_step)
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -1785,8 +1894,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step(task, self.clean_steps, self.service._do_next_clean_step(task, self.next_clean_steps)
self.clean_steps[0])
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -1817,8 +1925,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step(task, self.clean_steps, self.service._do_next_clean_step(task, self.next_clean_steps)
self.clean_steps[0])
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -1847,8 +1954,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step( self.service._do_next_clean_step(task, [])
task, self.clean_steps, self.clean_steps[-1])
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -1876,8 +1982,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step( self.service._do_next_clean_step(task, self.clean_steps)
task, self.clean_steps, node.clean_step)
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -1892,35 +1997,6 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
mock.call(self.clean_steps[2]) mock.call(self.clean_steps[2])
] ]
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step')
def test__do_next_clean_step_bad_last_step(self, mock_execute):
# Make sure cleaning fails if last_step is incorrect
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANING,
target_provision_state=states.AVAILABLE,
last_error=None,
clean_step={})
self._start_service()
with task_manager.acquire(
self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step(
task, self.clean_steps, {'interface': 'deploy',
'step': 'not_a_clean_step',
'priority': 100})
self.service._worker_pool.waitall()
node.refresh()
# Node should have failed without executing anything
self.assertEqual(states.CLEANFAIL, node.provision_state)
self.assertEqual({}, node.clean_step)
self.assertIsNotNone(node.last_error)
self.assertTrue(node.maintenance)
self.assertFalse(mock_execute.called)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step')
@mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True) @mock.patch.object(fake.FakeDeploy, 'tear_down_cleaning', autospec=True)
def test__do_next_clean_step_fail(self, tear_mock, mock_execute): def test__do_next_clean_step_fail(self, tear_mock, mock_execute):
@ -1937,8 +2013,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step( self.service._do_next_clean_step(task, self.clean_steps)
task, self.clean_steps, node.clean_step)
tear_mock.assert_called_once_with(task.driver.deploy, task) tear_mock.assert_called_once_with(task.driver.deploy, task)
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
@ -1969,8 +2044,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step( self.service._do_next_clean_step(task, self.clean_steps)
task, self.clean_steps, node.clean_step)
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -1998,7 +2072,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step( self.service._do_next_clean_step(
task, [], node.clean_step) task, [])
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -2025,8 +2099,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
with task_manager.acquire( with task_manager.acquire(
self.context, node['id'], shared=False) as task: self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step( self.service._do_next_clean_step(task, self.clean_steps)
task, self.clean_steps, node.clean_step)
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
@ -2060,6 +2133,36 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
task.node.driver_internal_info['clean_steps']) task.node.driver_internal_info['clean_steps'])
self.assertEqual({}, node.clean_step) self.assertEqual({}, node.clean_step)
def test__get_node_next_clean_steps(self):
driver_internal_info = {'clean_steps': self.clean_steps}
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE,
driver_internal_info=driver_internal_info,
last_error=None,
clean_step=self.clean_steps[0])
with task_manager.acquire(self.context, node.uuid) as task:
steps = self.service._get_node_next_clean_steps(task)
self.assertEqual(self.next_clean_steps, steps)
def test__get_node_next_clean_steps_bad_clean_step(self):
driver_internal_info = {'clean_steps': self.clean_steps}
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANWAIT,
target_provision_state=states.AVAILABLE,
driver_internal_info=driver_internal_info,
last_error=None,
clean_step={'interface': 'deploy',
'step': 'not_a_clean_step',
'priority': 100})
with task_manager.acquire(self.context, node.uuid) as task:
self.assertRaises(exception.NodeCleaningFailure,
self.service._get_node_next_clean_steps, task)
@_mock_record_keepalive @_mock_record_keepalive
class DoNodeVerifyTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase): class DoNodeVerifyTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):

View File

@ -132,7 +132,7 @@ class CleanStepTestCase(base.TestCase):
def zap_method(self, task): def zap_method(self, task):
pass pass
@driver_base.clean_step(priority=10) @driver_base.clean_step(priority=10, abortable=True)
def clean_method(self, task): def clean_method(self, task):
method_mock(task) method_mock(task)
@ -146,7 +146,7 @@ class CleanStepTestCase(base.TestCase):
def zap_method2(self, task): def zap_method2(self, task):
pass pass
@driver_base.clean_step(priority=20) @driver_base.clean_step(priority=20, abortable=True)
def clean_method2(self, task): def clean_method2(self, task):
method_mock(task) method_mock(task)
@ -159,11 +159,13 @@ class CleanStepTestCase(base.TestCase):
self.assertEqual(2, len(obj.get_clean_steps(task_mock))) self.assertEqual(2, len(obj.get_clean_steps(task_mock)))
# Ensure the steps look correct # Ensure the steps look correct
self.assertEqual(10, obj.get_clean_steps(task_mock)[0]['priority']) self.assertEqual(10, obj.get_clean_steps(task_mock)[0]['priority'])
self.assertTrue(obj.get_clean_steps(task_mock)[0]['abortable'])
self.assertEqual('test', obj.get_clean_steps( self.assertEqual('test', obj.get_clean_steps(
task_mock)[0]['interface']) task_mock)[0]['interface'])
self.assertEqual('clean_method', obj.get_clean_steps( self.assertEqual('clean_method', obj.get_clean_steps(
task_mock)[0]['step']) task_mock)[0]['step'])
self.assertEqual(0, obj.get_clean_steps(task_mock)[1]['priority']) self.assertEqual(0, obj.get_clean_steps(task_mock)[1]['priority'])
self.assertFalse(obj.get_clean_steps(task_mock)[1]['abortable'])
self.assertEqual('test', obj.get_clean_steps( self.assertEqual('test', obj.get_clean_steps(
task_mock)[1]['interface']) task_mock)[1]['interface'])
self.assertEqual('zap_method', obj.get_clean_steps( self.assertEqual('zap_method', obj.get_clean_steps(
@ -173,11 +175,13 @@ class CleanStepTestCase(base.TestCase):
self.assertEqual(2, len(obj2.get_clean_steps(task_mock))) self.assertEqual(2, len(obj2.get_clean_steps(task_mock)))
# Ensure the steps look correct # Ensure the steps look correct
self.assertEqual(20, obj2.get_clean_steps(task_mock)[0]['priority']) self.assertEqual(20, obj2.get_clean_steps(task_mock)[0]['priority'])
self.assertTrue(obj2.get_clean_steps(task_mock)[0]['abortable'])
self.assertEqual('test2', obj2.get_clean_steps( self.assertEqual('test2', obj2.get_clean_steps(
task_mock)[0]['interface']) task_mock)[0]['interface'])
self.assertEqual('clean_method2', obj2.get_clean_steps( self.assertEqual('clean_method2', obj2.get_clean_steps(
task_mock)[0]['step']) task_mock)[0]['step'])
self.assertEqual(0, obj2.get_clean_steps(task_mock)[1]['priority']) self.assertEqual(0, obj2.get_clean_steps(task_mock)[1]['priority'])
self.assertFalse(obj.get_clean_steps(task_mock)[1]['abortable'])
self.assertEqual('test2', obj2.get_clean_steps( self.assertEqual('test2', obj2.get_clean_steps(
task_mock)[1]['interface']) task_mock)[1]['interface'])
self.assertEqual('zap_method2', obj2.get_clean_steps( self.assertEqual('zap_method2', obj2.get_clean_steps(