Agent deploy: account for disable_power_off

* In tear_down_agent: lock down the agent and keep the node powered on.
* In boot_instance: reboot the node instead of powering it on.
* In tear_down_inband_cleaning/service: avoid powering off the node.
* In tear_down_service: lock down the agent before tearing down.
* In tear_down and clean_up: do not power off, reboot after clean-up.
* In rescue/unrescue: use one reboot instead of power off/on.

Not locking down in tear_down_inband_cleaning because the node will not
end up on a tenant network (and lockdown may disrupt fast-track).

Depends-On: https://review.opendev.org/c/openstack/ironic-python-agent/+/934234
Partial-Bug: #2077432
Change-Id: Ib2297ec5b69df6b2ecd11942fb8fdc9d640de6bc
This commit is contained in:
Dmitry Tantsur 2024-11-11 12:02:17 +01:00
parent 80c83d82f2
commit a0dddb19a6
No known key found for this signature in database
GPG Key ID: 315B2AF9FD216C60
8 changed files with 482 additions and 95 deletions

View File

@ -412,7 +412,11 @@ class CustomAgentDeploy(agent_base.AgentBaseMixin,
client = agent_client.get_client(task)
try:
if not can_power_on:
if node.disable_power_off:
LOG.info("Node %s does not support power off, locking "
"down the agent", node.uuid)
client.lockdown(node)
elif not can_power_on:
LOG.info('Power interface of node %s does not support '
'power on, using reboot to switch to the instance',
node.uuid)
@ -486,7 +490,7 @@ class CustomAgentDeploy(agent_base.AgentBaseMixin,
LOG.debug('The agent for node %(node)s has recently checked '
'in, and the node power will remain unmodified.',
{'node': task.node.uuid})
else:
elif not node.disable_power_off:
# Powering off node to setup networking for port and
# ensure that the state is reset if it is inadvertently
# on for any unknown reason.
@ -1136,24 +1140,23 @@ class AgentRescue(base.RescueInterface):
:raises: any boot interface's prepare_ramdisk exceptions.
:returns: Returns states.RESCUEWAIT
"""
manager_utils.node_power_action(task, states.POWER_OFF)
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after rescue has started.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
with driver_utils.power_off_and_on(task):
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after rescue has started.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
task.driver.boot.clean_up_instance(task)
with manager_utils.power_state_for_network_configuration(task):
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_rescuing_network(task)
if CONF.agent.manage_agent_boot:
# prepare_ramdisk will set the boot device
deploy_utils.prepare_agent_boot(task)
manager_utils.node_power_action(task, states.POWER_ON)
task.driver.boot.clean_up_instance(task)
with manager_utils.power_state_for_network_configuration(task):
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_rescuing_network(task)
if CONF.agent.manage_agent_boot:
# prepare_ramdisk will set the boot device
deploy_utils.prepare_agent_boot(task)
return states.RESCUEWAIT
@ -1171,22 +1174,20 @@ class AgentRescue(base.RescueInterface):
:raises: any boot interface's prepare_instance exceptions.
:returns: Returns states.ACTIVE
"""
manager_utils.node_power_action(task, states.POWER_OFF)
with driver_utils.power_off_and_on(task):
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after unrescue.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after unrescue.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
self.clean_up(task)
with manager_utils.power_state_for_network_configuration(task):
task.driver.network.configure_tenant_networks(task)
task.driver.boot.prepare_instance(task)
manager_utils.node_power_action(task, states.POWER_ON)
self.clean_up(task)
with manager_utils.power_state_for_network_configuration(task):
task.driver.network.configure_tenant_networks(task)
task.driver.boot.prepare_instance(task)
return states.ACTIVE

View File

@ -755,7 +755,8 @@ class AgentBaseMixin(object):
:raises: other exceptions by the node's power driver if something
wrong occurred during the power action.
"""
manager_utils.node_power_action(task, states.POWER_OFF)
if not task.node.disable_power_off:
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.storage.detach_volumes(task)
deploy_utils.tear_down_storage_configuration(task)
with manager_utils.power_state_for_network_configuration(task):
@ -779,6 +780,11 @@ class AgentBaseMixin(object):
task.driver.boot.clean_up_instance(task)
provider = dhcp_factory.DHCPFactory()
provider.clean_dhcp(task)
# NOTE(dtantsur): in a regular case, this is handled by tear_down, but
# we cannot power off the node there, so making sure it no longer runs
# the instance image by rebooting after boot.clean_up_instance.
if task.node.disable_power_off:
manager_utils.node_power_action(task, states.REBOOT)
def take_over(self, task):
"""Take over management of this node from a dead conductor.
@ -841,8 +847,14 @@ class AgentBaseMixin(object):
:raises: NodeServiceFailure, NetworkError if the servicing ports
cannot be removed
"""
deploy_utils.tear_down_inband_service(
task)
if task.node.disable_power_off:
LOG.debug('Locking down the agent on node %s before exiting '
'servicing', task.node.uuid)
agent_client.get_client(task).lockdown(
# NOTE(dtantsur): we cannot assume the agent is still running
# hence fail_if_unavailable=False.
task.node, fail_if_unavailable=False)
deploy_utils.tear_down_inband_service(task)
@METRICS.timer('AgentBaseMixin.get_clean_steps')
def get_clean_steps(self, task):
@ -1178,7 +1190,10 @@ class AgentOobStepsMixin(object):
can_power_on = (states.POWER_ON in
task.driver.power.get_supported_power_states(task))
try:
if can_power_on:
if task.node.disable_power_off:
# We haven't powered off the node yet - reset it now.
manager_utils.node_power_action(task, states.REBOOT)
elif can_power_on:
manager_utils.node_power_action(task, states.POWER_ON)
else:
LOG.debug('Not trying to power on node %s that does not '

View File

@ -15,10 +15,12 @@
from http import client as http_client
import os
import ssl
import time
from ironic_lib import metrics_utils
from oslo_log import log
from oslo_serialization import jsonutils
from oslo_utils import excutils
from oslo_utils import strutils
import requests
import tenacity
@ -839,3 +841,75 @@ class AgentClient(object):
return self._command(node=node,
method='rescue.finalize_rescue',
params=params)
@METRICS.timer('AgentClient.lockdown')
def lockdown(self, node, fail_if_unavailable=True):
"""Lock down the agent so that it's not usable any more.
:param node: A Node object.
:param fail_if_unavailable: Whether to fail this call if the agent is
already unavailable.
:raises: IronicException when failed to issue the request or there was
a malformed response from the agent.
:raises: AgentAPIError when agent failed to execute specified command.
:raises: AgentInProgress when the command fails to execute as the agent
is presently executing the prior command.
"""
if not fail_if_unavailable:
try:
self.get_commands_status(node, expect_errors=True)
except exception.AgentConnectionFailed:
LOG.debug('Agent is already down when trying to lock down '
'node %s', node.uuid)
return
except Exception:
LOG.exception('Unexpected exception when checking agent '
'status on node %s, proceeding with lockdown',
node.uuid)
wait = CONF.agent.post_deploy_get_power_state_retry_interval
attempts = CONF.agent.post_deploy_get_power_state_retries + 1
@tenacity.retry(stop=tenacity.stop_after_attempt(attempts),
retry=tenacity.retry_unless_exception_type(
exception.AgentConnectionFailed),
wait=tenacity.wait_fixed(wait),
sleep=time.sleep, # for unit testing
reraise=True)
def _wait_until_locked_down(node):
self.get_commands_status(node, expect_errors=True)
LOG.debug('Agent is still available on node %s, waiting for the '
'lockdown command to take effect', node.uuid)
self.sync(node)
try:
self._command(node=node, method='system.lockdown', params={})
except Exception as e:
with excutils.save_and_raise_exception():
LOG.error('Failed to lock down node %(node_uuid)s. '
'%(cls)s: %(error)s',
{'node_uuid': node.uuid,
'cls': e.__class__.__name__, 'error': e},
exc_info=not isinstance(
e, exception.IronicException))
try:
_wait_until_locked_down(node)
except exception.AgentConnectionFailed:
pass # expected
except tenacity.RetryError:
LOG.error('Failed to lock down node %(node_uuid)s in at least '
'%(timeout)d seconds: agent is still available',
{'node_uuid': node.uuid,
'timeout': (wait * (attempts - 1))})
raise exception.AgentCommandTimeout(command='system.lockdown',
node=node.uuid)
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.error('Failed to lock down node %(node_uuid)s. '
'%(cls)s: %(error)s',
{'node_uuid': node.uuid,
'cls': e.__class__.__name__, 'error': e},
exc_info=not isinstance(
e, exception.IronicException))

View File

@ -772,7 +772,7 @@ def tear_down_inband_cleaning(task, manage_boot=True):
node = task.node
cleaning_failure = (node.fault == faults.CLEAN_FAILURE)
if not (fast_track or cleaning_failure):
if not (fast_track or cleaning_failure or node.disable_power_off):
manager_utils.node_power_action(task, states.POWER_OFF)
if manage_boot:
@ -781,8 +781,11 @@ def tear_down_inband_cleaning(task, manage_boot=True):
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
task.driver.network.remove_cleaning_network(task)
if not (fast_track or cleaning_failure):
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
if node.disable_power_off:
manager_utils.node_power_action(task, states.REBOOT)
else:
manager_utils.restore_power_state_if_needed(
task, power_state_to_restore)
def prepare_inband_service(task):
@ -797,24 +800,23 @@ def prepare_inband_service(task):
:raises: any boot interface's prepare_ramdisk exceptions.
:returns: Returns states.SERVICEWAIT
"""
manager_utils.node_power_action(task, states.POWER_OFF)
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after rescue has started.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
with driver_utils.power_off_and_on(task):
# NOTE(TheJulia): Revealing that the power is off at any time can
# cause external power sync to decide that the node must be off.
# This may result in a post-rescued instance being turned off
# unexpectedly after rescue has started.
# TODO(TheJulia): Once we have power/state callbacks to nova,
# the reset of the power_state can be removed.
task.node.power_state = states.POWER_ON
task.node.save()
task.driver.boot.clean_up_instance(task)
with manager_utils.power_state_for_network_configuration(task):
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_servicing_network(task)
if CONF.agent.manage_agent_boot:
# prepare_ramdisk will set the boot device
prepare_agent_boot(task)
manager_utils.node_power_action(task, states.POWER_ON)
task.driver.boot.clean_up_instance(task)
with manager_utils.power_state_for_network_configuration(task):
task.driver.network.unconfigure_tenant_networks(task)
task.driver.network.add_servicing_network(task)
if CONF.agent.manage_agent_boot:
# prepare_ramdisk will set the boot device
prepare_agent_boot(task)
return states.SERVICEWAIT
@ -838,7 +840,7 @@ def tear_down_inband_service(task):
node = task.node
service_failure = (node.fault == faults.SERVICE_FAILURE)
if not service_failure:
if not service_failure and not node.disable_power_off:
manager_utils.node_power_action(task, states.POWER_OFF)
task.driver.boot.clean_up_ramdisk(task)
@ -851,7 +853,9 @@ def tear_down_inband_service(task):
task.driver.boot.prepare_instance(task)
# prepare_instance does not power on the node, the deploy interface is
# normally responsible for that.
manager_utils.node_power_action(task, states.POWER_ON)
next_state = (states.REBOOT if task.node.disable_power_off
else states.POWER_ON)
manager_utils.node_power_action(task, next_state)
def get_image_instance_info(node):
@ -1652,7 +1656,8 @@ def reboot_to_finish_step(task, timeout=None):
disable_ramdisk = task.node.driver_internal_info.get(
'cleaning_disable_ramdisk')
if not disable_ramdisk:
if manager_utils.is_fast_track(task):
if (manager_utils.is_fast_track(task)
and not task.node.disable_power_off):
LOG.debug('Forcing power off on node %s for a clean reboot into '
'the agent image', task.node)
manager_utils.node_power_action(task, states.POWER_OFF)

View File

@ -12,6 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
import contextlib
import os
import tempfile
@ -504,3 +505,18 @@ def need_prepare_ramdisk(node):
states.INSPECTWAIT,
states.SERVICING,
states.SERVICEWAIT)
@contextlib.contextmanager
def power_off_and_on(task):
"""A context manager that handles a reboot.
If disable_power_off is False, the node is powered off before yielding and
powered back on afterwards. Otherwise, one reboot is issued in the end.
"""
if not task.node.disable_power_off:
utils.node_power_action(task, states.POWER_OFF)
yield
next_state = (states.REBOOT if task.node.disable_power_off
else states.POWER_ON)
utils.node_power_action(task, next_state)

View File

@ -342,6 +342,29 @@ class CommonTestsMixin:
dhcp_factor_mock.assert_called_once_with()
destroy_images_mock.assert_called_once_with(task.node)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(deploy_utils, 'destroy_http_instance_images',
autospec=True)
@mock.patch('ironic.common.dhcp_factory.DHCPFactory', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_instance', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
def test_clean_up_disable_power_off(self, pxe_clean_up_ramdisk_mock,
pxe_clean_up_instance_mock,
dhcp_factor_mock,
destroy_images_mock,
node_power_action_mock):
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
task.node.disable_power_off = True
self.driver.clean_up(task)
pxe_clean_up_ramdisk_mock.assert_called_once_with(
task.driver.boot, task)
pxe_clean_up_instance_mock.assert_called_once_with(
task.driver.boot, task)
dhcp_factor_mock.assert_called_once_with()
destroy_images_mock.assert_called_once_with(task.node)
node_power_action_mock.assert_called_once_with(task, states.REBOOT)
class TestCustomAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
def setUp(self):
@ -388,6 +411,7 @@ class TestCustomAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
show_mock.assert_not_called()
validate_http_mock.assert_not_called()
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
autospec=True)
@mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info',
@ -408,7 +432,7 @@ class TestCustomAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
unconfigure_tenant_net_mock, add_provisioning_net_mock,
build_instance_info_mock, build_options_mock,
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
storage_attach_volumes_mock):
storage_attach_volumes_mock, node_power_action_mock):
node = self.node
node.network_interface = 'flat'
node.save()
@ -427,7 +451,10 @@ class TestCustomAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
build_options_mock.assert_called_once_with(task.node)
pxe_prepare_ramdisk_mock.assert_called_once_with(
task.driver.boot, task, {'a': 'b'})
node_power_action_mock.assert_called_once_with(
task, states.POWER_OFF)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
autospec=True)
@ -449,7 +476,8 @@ class TestCustomAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
unconfigure_tenant_net_mock, add_provisioning_net_mock,
build_instance_info_mock, build_options_mock,
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
storage_attach_volumes_mock, is_fast_track_mock):
storage_attach_volumes_mock, is_fast_track_mock,
node_power_action_mock):
# TODO(TheJulia): We should revisit this test. Smartnic
# support didn't wire in tightly on testing for power in
# these tests, and largely fast_track impacts power operations.
@ -474,6 +502,50 @@ class TestCustomAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
# to work, reboots as part of deployment would need the ramdisk
# present and ready.
self.assertFalse(build_options_mock.called)
node_power_action_mock.assert_not_called()
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
autospec=True)
@mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info',
autospec=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True)
@mock.patch.object(deploy_utils, 'build_agent_options', autospec=True)
@mock.patch.object(deploy_utils, 'build_instance_info_for_deploy',
autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'add_provisioning_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork,
'unconfigure_tenant_networks',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'validate',
spec_set=True, autospec=True)
def test_prepare_disable_power_off(
self, validate_net_mock,
unconfigure_tenant_net_mock, add_provisioning_net_mock,
build_instance_info_mock, build_options_mock,
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
storage_attach_volumes_mock, node_power_action_mock):
node = self.node
node.network_interface = 'flat'
node.disable_power_off = True
node.save()
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
task.node.provision_state = states.DEPLOYING
build_options_mock.return_value = {'a': 'b'}
self.driver.prepare(task)
storage_driver_info_mock.assert_called_once_with(task)
validate_net_mock.assert_called_once_with(mock.ANY, task)
add_provisioning_net_mock.assert_called_once_with(mock.ANY, task)
unconfigure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
storage_attach_volumes_mock.assert_called_once_with(
task.driver.storage, task)
build_instance_info_mock.assert_not_called()
build_options_mock.assert_called_once_with(task.node)
pxe_prepare_ramdisk_mock.assert_called_once_with(
task.driver.boot, task, {'a': 'b'})
node_power_action_mock.assert_not_called()
class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
@ -758,6 +830,41 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
self.context, self.node['uuid'], shared=False) as task:
self.assertEqual(0, len(task.volume_targets))
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
autospec=True)
@mock.patch.object(flat_network.FlatNetwork,
'remove_provisioning_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork,
'unconfigure_tenant_networks',
spec_set=True, autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_tear_down_disable_power_off(self, power_mock,
unconfigure_tenant_nets_mock,
remove_provisioning_net_mock,
storage_detach_volumes_mock):
object_utils.create_test_volume_target(
self.context, node_id=self.node.id)
node = self.node
node.network_interface = 'flat'
node.disable_power_off = True
node.save()
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
driver_return = self.driver.tear_down(task)
power_mock.assert_not_called()
self.assertEqual(driver_return, states.DELETED)
unconfigure_tenant_nets_mock.assert_called_once_with(mock.ANY,
task)
remove_provisioning_net_mock.assert_called_once_with(mock.ANY,
task)
storage_detach_volumes_mock.assert_called_once_with(
task.driver.storage, task)
# Verify no volumes exist for new task instances.
with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task:
self.assertEqual(0, len(task.volume_targets))
@mock.patch('ironic.drivers.modules.agent.check_image_size',
autospec=True)
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
@ -1193,8 +1300,19 @@ class TestAgentDeploy(CommonTestsMixin, db_base.DbTestCase):
def test_tear_down_service(self, tear_down_service_mock):
with task_manager.acquire(self.context, self.node.uuid) as task:
self.driver.tear_down_service(task)
tear_down_service_mock.assert_called_once_with(
task)
tear_down_service_mock.assert_called_once_with(task)
@mock.patch.object(agent_client.AgentClient, 'lockdown', autospec=True)
@mock.patch.object(deploy_utils, 'tear_down_inband_service',
autospec=True)
def test_tear_down_service_disable_power_off(self, tear_down_service_mock,
lockdown_mock):
with task_manager.acquire(self.context, self.node.uuid) as task:
task.node.disable_power_off = True
self.driver.tear_down_service(task)
tear_down_service_mock.assert_called_once_with(task)
lockdown_mock.assert_called_once_with(mock.ANY, task.node,
fail_if_unavailable=False)
@mock.patch.object(deploy_utils, 'tear_down_inband_cleaning',
autospec=True)
@ -2080,6 +2198,32 @@ class AgentRescueTestCase(db_base.DbTestCase):
self.assertFalse(mock_prepare_ramdisk.called)
self.assertEqual(states.RESCUEWAIT, result)
@mock.patch.object(flat_network.FlatNetwork, 'add_rescuing_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'unconfigure_tenant_networks',
spec_set=True, autospec=True)
@mock.patch.object(fake.FakeBoot, 'prepare_ramdisk', autospec=True)
@mock.patch.object(fake.FakeBoot, 'clean_up_instance', autospec=True)
@mock.patch.object(deploy_utils, 'build_agent_options', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_agent_rescue_disable_power_off(
self, mock_node_power_action, mock_build_agent_opts,
mock_clean_up_instance, mock_prepare_ramdisk,
mock_unconf_tenant_net, mock_add_rescue_net):
self.config(manage_agent_boot=True, group='agent')
mock_build_agent_opts.return_value = {'ipa-api-url': 'fake-api'}
with task_manager.acquire(self.context, self.node.uuid) as task:
task.node.disable_power_off = True
result = task.driver.rescue.rescue(task)
mock_node_power_action.assert_called_once_with(task, states.REBOOT)
mock_clean_up_instance.assert_called_once_with(mock.ANY, task)
mock_unconf_tenant_net.assert_called_once_with(mock.ANY, task)
mock_add_rescue_net.assert_called_once_with(mock.ANY, task)
mock_build_agent_opts.assert_called_once_with(task.node)
mock_prepare_ramdisk.assert_called_once_with(
mock.ANY, task, {'ipa-api-url': 'fake-api'})
self.assertEqual(states.RESCUEWAIT, result)
@mock.patch.object(flat_network.FlatNetwork, 'remove_rescuing_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'configure_tenant_networks',
@ -2128,6 +2272,30 @@ class AgentRescueTestCase(db_base.DbTestCase):
mock_prepare_instance.assert_called_once_with(mock.ANY, task)
self.assertEqual(states.ACTIVE, result)
@mock.patch.object(flat_network.FlatNetwork, 'remove_rescuing_network',
spec_set=True, autospec=True)
@mock.patch.object(flat_network.FlatNetwork, 'configure_tenant_networks',
spec_set=True, autospec=True)
@mock.patch.object(fake.FakeBoot, 'prepare_instance', autospec=True)
@mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_agent_unrescue_disable_power_off(
self, mock_node_power_action, mock_clean_ramdisk,
mock_prepare_instance, mock_conf_tenant_net,
mock_remove_rescue_net):
"""Test unrescue in case where boot driver prepares instance reboot."""
self.config(manage_agent_boot=True, group='agent')
with task_manager.acquire(self.context, self.node.uuid) as task:
task.node.disable_power_off = True
result = task.driver.rescue.unrescue(task)
mock_node_power_action.assert_called_once_with(task, states.REBOOT)
mock_clean_ramdisk.assert_called_once_with(
mock.ANY, task)
mock_remove_rescue_net.assert_called_once_with(mock.ANY, task)
mock_conf_tenant_net.assert_called_once_with(mock.ANY, task)
mock_prepare_instance.assert_called_once_with(mock.ANY, task)
self.assertEqual(states.ACTIVE, result)
@mock.patch.object(fake.FakeBoot, 'clean_up_instance', autospec=True)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
def test_agent_rescue_power_on(self, mock_node_power_action,
@ -2343,6 +2511,9 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
def setUp(self):
super().setUp()
self.deploy = agent.CustomAgentDeploy()
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
@mock.patch.object(manager_utils, 'power_on_node_if_needed', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@ -2357,9 +2528,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
node_power_action_mock, collect_mock,
power_on_node_if_needed_mock):
cfg.CONF.set_override('deploy_logs_collect', 'always', 'agent')
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
get_power_state_mock.side_effect = [states.POWER_ON,
states.POWER_OFF]
@ -2383,9 +2551,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
def test_tear_down_agent_soft_poweroff_doesnt_complete(
self, power_off_mock, get_power_state_mock,
node_power_action_mock, mock_collect):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
get_power_state_mock.return_value = states.POWER_ON
self.deploy.tear_down_agent(task)
@ -2408,9 +2573,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
self, power_off_mock, get_power_state_mock, node_power_action_mock,
mock_collect):
power_off_mock.side_effect = RuntimeError("boom")
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
get_power_state_mock.return_value = states.POWER_ON
self.deploy.tear_down_agent(task)
@ -2434,9 +2596,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
# Test the situation when soft power off works, but ironic doesn't
# learn about it.
power_off_mock.side_effect = RuntimeError("boom")
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
get_power_state_mock.side_effect = [states.POWER_ON,
states.POWER_OFF]
@ -2458,9 +2617,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
def test_tear_down_agent_get_power_state_fails(
self, power_off_mock, get_power_state_mock, node_power_action_mock,
mock_collect, power_on_node_if_needed_mock):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
get_power_state_mock.return_value = RuntimeError("boom")
power_on_node_if_needed_mock.return_value = None
@ -2483,9 +2639,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
def test_tear_down_agent_power_off_fails(
self, power_off_mock, get_power_state_mock,
node_power_action_mock, mock_collect):
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
get_power_state_mock.return_value = states.POWER_ON
node_power_action_mock.side_effect = RuntimeError("boom")
@ -2509,10 +2662,8 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
driver_info = self.node.driver_info
driver_info['deploy_forces_oob_reboot'] = True
self.node.driver_info = driver_info
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
self.deploy.tear_down_agent(task)
@ -2534,10 +2685,8 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
driver_info = self.node.driver_info
driver_info['deploy_forces_oob_reboot'] = True
self.node.driver_info = driver_info
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
log_mock.reset_mock()
@ -2568,9 +2717,6 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
self, sync_mock, node_power_action_mock, collect_mock,
power_on_node_if_needed_mock):
cfg.CONF.set_override('deploy_logs_collect', 'always', 'agent')
self.node.provision_state = states.DEPLOYING
self.node.target_provision_state = states.ACTIVE
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
self.deploy.tear_down_agent(task)
node_power_action_mock.assert_called_once_with(task, states.REBOOT)
@ -2581,6 +2727,24 @@ class TearDownAgentTest(test_agent_base.AgentDeployMixinBaseTest):
sync_mock.assert_called_once_with(agent_client.get_client(task),
task.node)
@mock.patch.object(manager_utils, 'power_on_node_if_needed', autospec=True)
@mock.patch.object(time, 'sleep', lambda seconds: None)
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'lockdown', autospec=True)
def test_tear_down_agent_disable_power_off(
self, lockdown_mock, node_power_action_mock,
power_on_node_if_needed_mock):
self.node.disable_power_off = True
self.node.save()
with task_manager.acquire(self.context, self.node.uuid) as task:
power_on_node_if_needed_mock.return_value = None
self.deploy.tear_down_agent(task)
lockdown_mock.assert_called_once_with(mock.ANY, task.node)
node_power_action_mock.assert_not_called()
self.assertEqual(states.DEPLOYING, task.node.provision_state)
self.assertEqual(states.ACTIVE, task.node.target_provision_state)
self.assertIsNone(task.node.last_error)
class SwitchToTenantNetworkTest(test_agent_base.AgentDeployMixinBaseTest):

View File

@ -894,3 +894,78 @@ class TestAgentClientAttempts(base.TestCase):
params={'wait': 'false'},
timeout=60,
verify=True)
@mock.patch('time.sleep', autospec=True)
@mock.patch.object(agent_client.AgentClient, 'get_commands_status',
autospec=True)
@mock.patch.object(agent_client.AgentClient, '_command', autospec=True)
class TestLockDown(base.TestCase):
def setUp(self):
super().setUp()
self.client = agent_client.AgentClient()
self.node = MockNode()
def test_okay(self, mock_command, mock_status, mock_sleep):
mock_status.side_effect = [
{},
{},
exception.AgentConnectionFailed(reason=''),
]
self.client.lockdown(self.node)
mock_command.assert_has_calls([
mock.call(self.client, node=self.node,
method='standby.sync', params={}, wait=True),
mock.call(self.client, node=self.node,
method='system.lockdown', params={}),
])
mock_status.assert_called_with(self.client, self.node,
expect_errors=True)
self.assertEqual(3, mock_status.call_count)
mock_sleep.assert_called_with(5)
def test_okay_with_fail_if_unavailable(self, mock_command, mock_status,
mock_sleep):
mock_status.side_effect = [
{},
{},
exception.AgentConnectionFailed(reason=''),
]
self.client.lockdown(self.node, fail_if_unavailable=False)
mock_command.assert_has_calls([
mock.call(self.client, node=self.node,
method='standby.sync', params={}, wait=True),
mock.call(self.client, node=self.node,
method='system.lockdown', params={}),
])
mock_status.assert_called_with(self.client, self.node,
expect_errors=True)
self.assertEqual(3, mock_status.call_count)
mock_sleep.assert_called_with(5)
def test_agent_already_down(self, mock_command, mock_status, mock_sleep):
mock_status.side_effect = exception.AgentConnectionFailed(reason='')
self.client.lockdown(self.node, fail_if_unavailable=False)
mock_command.assert_not_called()
mock_status.assert_called_once_with(self.client, self.node,
expect_errors=True)
def test_timeout(self, mock_command, mock_status, mock_sleep):
self.assertRaises(exception.AgentCommandTimeout,
self.client.lockdown, self.node)
mock_command.assert_has_calls([
mock.call(self.client, node=self.node,
method='standby.sync', params={}, wait=True),
mock.call(self.client, node=self.node,
method='system.lockdown', params={}),
])
mock_status.assert_called_with(self.client, self.node,
expect_errors=True)
mock_sleep.assert_called_with(5)

View File

@ -1049,17 +1049,22 @@ class AgentMethodsTestCase(db_base.DbTestCase):
def _test_tear_down_inband_cleaning(
self, power_mock, remove_cleaning_network_mock,
clean_up_ramdisk_mock, is_fast_track_mock,
manage_boot=True, fast_track=False, cleaning_error=False):
manage_boot=True, fast_track=False, cleaning_error=False,
disable_power_off=False):
is_fast_track_mock.return_value = fast_track
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
task.node.disable_power_off = disable_power_off
if cleaning_error:
task.node.fault = faults.CLEAN_FAILURE
utils.tear_down_inband_cleaning(task, manage_boot=manage_boot)
if not (fast_track or cleaning_error):
power_mock.assert_called_once_with(task, states.POWER_OFF)
if disable_power_off:
power_mock.assert_called_once_with(task, states.REBOOT)
else:
power_mock.assert_called_once_with(task, states.POWER_OFF)
else:
self.assertFalse(power_mock.called)
power_mock.assert_not_called()
remove_cleaning_network_mock.assert_called_once_with(
task.driver.network, task)
if manage_boot:
@ -1080,6 +1085,13 @@ class AgentMethodsTestCase(db_base.DbTestCase):
def test_tear_down_inband_cleaning_cleaning_error(self):
self._test_tear_down_inband_cleaning(cleaning_error=True)
def test_tear_down_inband_cleaning_disable_power_off(self):
self._test_tear_down_inband_cleaning(disable_power_off=True)
def test_tear_down_inband_cleaning_disable_power_off_and_fast_track(self):
self._test_tear_down_inband_cleaning(disable_power_off=True,
fast_track=True)
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
@ -1123,6 +1135,31 @@ class AgentMethodsTestCase(db_base.DbTestCase):
clean_up_ramdisk_mock.assert_called_once_with(
task.driver.boot, task)
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
'remove_servicing_network', autospec=True)
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
def test_tear_down_inband_service_disable_power_off(
self, power_mock, remove_service_network_mock,
clean_up_ramdisk_mock, prepare_instance_mock):
# NOTE(TheJulia): This should be back to servicing upon a heartbeat
# operation, before we go back to WAIT. We wouldn't know to teardown
# in a wait state anyway.
self.node.provision_state = states.SERVICING
self.node.disable_power_off = True
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=False) as task:
utils.tear_down_inband_service(task)
power_mock.assert_called_once_with(task, states.REBOOT)
remove_service_network_mock.assert_called_once_with(
task.driver.network, task)
clean_up_ramdisk_mock.assert_called_once_with(
task.driver.boot, task)
prepare_instance_mock.assert_called_once_with(task.driver.boot,
task)
def test_build_agent_options_conf(self):
self.config(endpoint_override='https://api-url',
group='service_catalog')