fast tracked deployment support
Provides a facility to minimize the power state changes of a baremetal node to save critical time during deployment operations. Story: #2004965 Task: #29408 Depends-On: https://review.openstack.org/636778 Change-Id: I7ebbaddb33b38c87246c10165339ac4bac0ac6fc
This commit is contained in:
parent
9625b8aa71
commit
3f734c8728
@ -449,6 +449,8 @@ IRONIC_DEPLOY_LOGS_STORAGE_BACKEND=${IRONIC_DEPLOY_LOGS_STORAGE_BACKEND:-local}
|
||||
# The path to the directory where Ironic should put the logs when IRONIC_DEPLOY_LOGS_STORAGE_BACKEND is set to "local"
|
||||
IRONIC_DEPLOY_LOGS_LOCAL_PATH=${IRONIC_DEPLOY_LOGS_LOCAL_PATH:-$IRONIC_VM_LOG_DIR/deploy_logs}
|
||||
|
||||
# Fast track option
|
||||
IRONIC_DEPLOY_FAST_TRACK=${IRONIC_DEPLOY_FAST_TRACK:-False}
|
||||
|
||||
# Define baremetal min_microversion in tempest config. Default value None is picked from tempest.
|
||||
TEMPEST_BAREMETAL_MIN_MICROVERSION=${TEMPEST_BAREMETAL_MIN_MICROVERSION:-}
|
||||
@ -1115,6 +1117,9 @@ function configure_ironic {
|
||||
iniset $IRONIC_CONF_FILE DEFAULT rpc_transport $IRONIC_RPC_TRANSPORT
|
||||
iniset $IRONIC_CONF_FILE json_rpc port $IRONIC_JSON_RPC_PORT
|
||||
|
||||
# Set fast track options
|
||||
iniset $IRONIC_CONF_FILE deploy fast_track $IRONIC_DEPLOY_FAST_TRACK
|
||||
|
||||
# Configure Ironic conductor, if it was enabled.
|
||||
if is_service_enabled ir-cond; then
|
||||
configure_ironic_conductor
|
||||
@ -2553,6 +2558,8 @@ function ironic_configure_tempest {
|
||||
|
||||
# Enabled features
|
||||
iniset $TEMPEST_CONFIG baremetal_feature_enabled ipxe_enabled $IRONIC_IPXE_ENABLED
|
||||
iniset $TEMPEST_CONFIG baremetal_feature_enabled fast_track_discovery $IRONIC_DEPLOY_FAST_TRACK
|
||||
|
||||
}
|
||||
|
||||
function get_ironic_node_prefix {
|
||||
|
@ -37,10 +37,6 @@ LOG = log.getLogger(__name__)
|
||||
|
||||
_LOOKUP_RETURN_FIELDS = ('uuid', 'properties', 'instance_info',
|
||||
'driver_internal_info')
|
||||
_LOOKUP_ALLOWED_STATES = {states.DEPLOYING, states.DEPLOYWAIT,
|
||||
states.CLEANING, states.CLEANWAIT,
|
||||
states.INSPECTING,
|
||||
states.RESCUING, states.RESCUEWAIT}
|
||||
|
||||
|
||||
def config():
|
||||
@ -83,6 +79,12 @@ class LookupResult(base.APIBase):
|
||||
class LookupController(rest.RestController):
|
||||
"""Controller handling node lookup for a deploy ramdisk."""
|
||||
|
||||
@property
|
||||
def lookup_allowed_states(self):
|
||||
if CONF.deploy.fast_track:
|
||||
return states.FASTTRACK_LOOKUP_ALLOWED_STATES
|
||||
return states.LOOKUP_ALLOWED_STATES
|
||||
|
||||
@expose.expose(LookupResult, types.listtype, types.uuid)
|
||||
def get_all(self, addresses=None, node_uuid=None):
|
||||
"""Look up a node by its MAC addresses and optionally UUID.
|
||||
@ -144,7 +146,7 @@ class LookupController(rest.RestController):
|
||||
raise exception.NotFound()
|
||||
|
||||
if (CONF.api.restrict_lookup
|
||||
and node.provision_state not in _LOOKUP_ALLOWED_STATES):
|
||||
and node.provision_state not in self.lookup_allowed_states):
|
||||
raise exception.NotFound()
|
||||
|
||||
return LookupResult.convert_with_links(node)
|
||||
|
@ -799,7 +799,12 @@ def build_service_pxe_config(task, instance_image_info,
|
||||
# NOTE(pas-ha) if it is takeover of ACTIVE node or node performing
|
||||
# unrescue operation, first ensure that basic PXE configs and links
|
||||
# are in place before switching pxe config
|
||||
if (node.provision_state in [states.ACTIVE, states.UNRESCUING]
|
||||
# NOTE(TheJulia): Also consider deploying a valid state to go ahead
|
||||
# and check things before continuing, as otherwise deployments can
|
||||
# fail if the agent was booted outside the direct actions of the
|
||||
# boot interface.
|
||||
if (node.provision_state in [states.ACTIVE, states.UNRESCUING,
|
||||
states.DEPLOYING]
|
||||
and not os.path.isfile(pxe_config_path)):
|
||||
pxe_options = build_pxe_config_options(task, instance_image_info,
|
||||
service=True,
|
||||
@ -808,6 +813,7 @@ def build_service_pxe_config(task, instance_image_info,
|
||||
create_pxe_config(task, pxe_options, pxe_config_template,
|
||||
ipxe_enabled=ipxe_enabled)
|
||||
iwdi = node.driver_internal_info.get('is_whole_disk_image')
|
||||
|
||||
deploy_utils.switch_pxe_config(
|
||||
pxe_config_path, root_uuid_or_disk_id,
|
||||
boot_mode_utils.get_boot_mode(node),
|
||||
|
@ -243,6 +243,20 @@ If a node gets stuck with one of these states for some reason
|
||||
to fail state.
|
||||
"""
|
||||
|
||||
_LOOKUP_ALLOWED_STATES = (DEPLOYING, DEPLOYWAIT, CLEANING, CLEANWAIT,
|
||||
INSPECTING, RESCUING, RESCUEWAIT)
|
||||
LOOKUP_ALLOWED_STATES = frozenset(_LOOKUP_ALLOWED_STATES)
|
||||
|
||||
"""States when API lookups are normally allowed for nodes."""
|
||||
|
||||
_FASTTRACK_LOOKUP_ALLOWED_STATES = (ENROLL, MANAGEABLE, AVAILABLE,
|
||||
DEPLOYING, DEPLOYWAIT, CLEANING,
|
||||
CLEANWAIT, INSPECTING, RESCUING,
|
||||
RESCUEWAIT)
|
||||
FASTTRACK_LOOKUP_ALLOWED_STATES = frozenset(_FASTTRACK_LOOKUP_ALLOWED_STATES)
|
||||
"""States where API lookups are permitted with fast track enabled."""
|
||||
|
||||
|
||||
##############
|
||||
# Power states
|
||||
##############
|
||||
|
@ -13,6 +13,7 @@
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import time
|
||||
|
||||
from openstack.baremetal import configdrive as os_configdrive
|
||||
@ -21,6 +22,7 @@ from oslo_log import log
|
||||
from oslo_serialization import jsonutils
|
||||
from oslo_service import loopingcall
|
||||
from oslo_utils import excutils
|
||||
from oslo_utils import timeutils
|
||||
import six
|
||||
|
||||
from ironic.common import boot_devices
|
||||
@ -1408,3 +1410,63 @@ def build_configdrive(node, configdrive):
|
||||
LOG.debug('Building a configdrive for node %s', node.uuid)
|
||||
return os_configdrive.build(meta_data, user_data=user_data,
|
||||
network_data=configdrive.get('network_data'))
|
||||
|
||||
|
||||
def fast_track_able(task):
|
||||
"""Checks if the operation can be a streamlined deployment sequence.
|
||||
|
||||
This is mainly focused on ensuring that we are able to quickly sequence
|
||||
through operations if we already have a ramdisk heartbeating through
|
||||
external means.
|
||||
|
||||
:param task: Taskmanager object
|
||||
:returns: True if [deploy]fast_track is set to True, no iSCSI boot
|
||||
configuration is present, and no last_error is present for
|
||||
the node indicating that there was a recent failure.
|
||||
"""
|
||||
return (CONF.deploy.fast_track
|
||||
# TODO(TheJulia): Network model aside, we should be able to
|
||||
# fast-track through initial sequence to complete deployment.
|
||||
# This needs to be validated.
|
||||
# TODO(TheJulia): Do we need a secondary guard? To prevent
|
||||
# driving through this we could query the API endpoint of
|
||||
# the agent with a short timeout such as 10 seconds, which
|
||||
# would help verify if the node is online.
|
||||
# TODO(TheJulia): Should we check the provisioning/deployment
|
||||
# networks match config wise? Do we care? #decisionsdecisions
|
||||
and task.driver.storage.should_write_image(task)
|
||||
and task.node.last_error is None)
|
||||
|
||||
|
||||
def is_fast_track(task):
|
||||
"""Checks a fast track is available.
|
||||
|
||||
This method first ensures that the node and conductor configuration
|
||||
is valid to perform a fast track sequence meaning that we already
|
||||
have a ramdisk running through another means like discovery.
|
||||
If not valid, False is returned.
|
||||
|
||||
The method then checks for the last agent heartbeat, and if it occured
|
||||
within the timeout set by [deploy]fast_track_timeout and the power
|
||||
state for the machine is POWER_ON, then fast track is permitted.
|
||||
|
||||
:param node: A node object.
|
||||
:returns: True if the last heartbeat that was recorded was within
|
||||
the [deploy]fast_track_timeout setting.
|
||||
"""
|
||||
if not fast_track_able(task):
|
||||
return False
|
||||
# use native datetime objects for conversion and compare
|
||||
# slightly odd because py2 compatability :(
|
||||
last = datetime.datetime.strptime(
|
||||
task.node.driver_internal_info.get(
|
||||
'agent_last_heartbeat',
|
||||
'1970-01-01T00:00:00.000000'),
|
||||
"%Y-%m-%dT%H:%M:%S.%f")
|
||||
# If we found nothing, we assume that the time is essentially epoch.
|
||||
time_delta = datetime.timedelta(seconds=CONF.deploy.fast_track_timeout)
|
||||
last_valid = timeutils.utcnow() - time_delta
|
||||
# Checking the power state, because if we find the machine off due to
|
||||
# any action, we can't actually fast track the node. :(
|
||||
return (last_valid <= last
|
||||
and task.driver.power.get_power_state(task) == states.POWER_ON)
|
||||
|
@ -113,6 +113,27 @@ opts = [
|
||||
'images for the direct deploy interface, when local '
|
||||
'HTTP service is incorporated to provide instance image '
|
||||
'instead of swift tempurls.')),
|
||||
cfg.BoolOpt('fast_track',
|
||||
default=False,
|
||||
help=_('Whether to allow deployment agents to perform lookup, '
|
||||
'heartbeat operations during initial states of a '
|
||||
'machine lifecycle and by-pass the normal setup '
|
||||
'procedures for a ramdisk. This feature also enables '
|
||||
'power operations which are part of deployment '
|
||||
'processes to be bypassed if the ramdisk has performed '
|
||||
'a heartbeat operation using the fast_track_timeout '
|
||||
'setting.')),
|
||||
cfg.IntOpt('fast_track_timeout',
|
||||
default=300,
|
||||
min=0,
|
||||
max=300,
|
||||
help=_('Seconds for which the last heartbeat event is to be '
|
||||
'considered valid for the purpose of a fast '
|
||||
'track sequence. This setting should generally be '
|
||||
'less than the number of seconds for "Power-On Self '
|
||||
'Test" and typical ramdisk start-up. This value should '
|
||||
'not exceed the [api]ramdisk_heartbeat_timeout '
|
||||
'setting.')),
|
||||
]
|
||||
|
||||
|
||||
|
@ -455,7 +455,14 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
:param task: a TaskManager instance.
|
||||
:returns: status of the deploy. One of ironic.common.states.
|
||||
"""
|
||||
if task.driver.storage.should_write_image(task):
|
||||
if manager_utils.is_fast_track(task):
|
||||
LOG.debug('Performing a fast track deployment for %(node)s.',
|
||||
{'node': task.node.uuid})
|
||||
# Update the database for the API and the task tracking resumes
|
||||
# the state machine state going from DEPLOYWAIT -> DEPLOYING
|
||||
task.process_event('wait')
|
||||
self.continue_deploy(task)
|
||||
elif task.driver.storage.should_write_image(task):
|
||||
manager_utils.node_power_action(task, states.REBOOT)
|
||||
return states.DEPLOYWAIT
|
||||
else:
|
||||
@ -521,6 +528,12 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
:raises: exception.InvalidParameterValue if network validation fails.
|
||||
:raises: any boot interface's prepare_ramdisk exceptions.
|
||||
"""
|
||||
|
||||
def _update_instance_info():
|
||||
node.instance_info = (
|
||||
deploy_utils.build_instance_info_for_deploy(task))
|
||||
node.save()
|
||||
|
||||
node = task.node
|
||||
deploy_utils.populate_storage_driver_internal_info(task)
|
||||
if node.provision_state == states.DEPLOYING:
|
||||
@ -547,25 +560,44 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
task.driver.network.validate(task)
|
||||
else:
|
||||
ctx.reraise = True
|
||||
|
||||
# Adding the node to provisioning network so that the dhcp
|
||||
# options get added for the provisioning port.
|
||||
# Determine if this is a fast track sequence
|
||||
fast_track_deploy = manager_utils.is_fast_track(task)
|
||||
if fast_track_deploy:
|
||||
# The agent has already recently checked in and we are
|
||||
# configured to take that as an indicator that we can
|
||||
# skip ahead.
|
||||
LOG.debug('The agent for node %(node)s has recently checked '
|
||||
'in, and the node power will remain unmodified.',
|
||||
{'node': task.node.uuid})
|
||||
else:
|
||||
# Powering off node to setup networking for port and
|
||||
# ensure that the state is reset if it is inadvertently
|
||||
# on for any unknown reason.
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
if task.driver.storage.should_write_image(task):
|
||||
# NOTE(vdrok): in case of rebuild, we have tenant network
|
||||
# already configured, unbind tenant ports if present
|
||||
if not fast_track_deploy:
|
||||
power_state_to_restore = (
|
||||
manager_utils.power_on_node_if_needed(task))
|
||||
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
task.driver.network.add_provisioning_network(task)
|
||||
if not fast_track_deploy:
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
else:
|
||||
# Fast track sequence in progress
|
||||
_update_instance_info()
|
||||
# Signal to storage driver to attach volumes
|
||||
task.driver.storage.attach_volumes(task)
|
||||
if not task.driver.storage.should_write_image(task):
|
||||
if (not task.driver.storage.should_write_image(task)
|
||||
or fast_track_deploy):
|
||||
# We have nothing else to do as this is handled in the
|
||||
# backend storage system, and we can return to the caller
|
||||
# as we do not need to boot the agent to deploy.
|
||||
# Alternatively, we could be in a fast track deployment
|
||||
# and again, we should have nothing to do here.
|
||||
return
|
||||
if node.provision_state in (states.ACTIVE, states.UNRESCUING):
|
||||
# Call is due to conductor takeover
|
||||
@ -573,9 +605,7 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
elif node.provision_state != states.ADOPTING:
|
||||
if node.provision_state not in (states.RESCUING, states.RESCUEWAIT,
|
||||
states.RESCUE, states.RESCUEFAIL):
|
||||
node.instance_info = (
|
||||
deploy_utils.build_instance_info_for_deploy(task))
|
||||
node.save()
|
||||
_update_instance_info()
|
||||
if CONF.agent.manage_agent_boot:
|
||||
deploy_opts = deploy_utils.build_agent_options(node)
|
||||
task.driver.boot.prepare_ramdisk(task, deploy_opts)
|
||||
|
@ -66,6 +66,19 @@ VENDOR_PROPERTIES = {
|
||||
'older deploy ramdisks. Defaults to False. Optional.')
|
||||
}
|
||||
|
||||
__HEARTBEAT_RECORD_ONLY = (states.ENROLL, states.MANAGEABLE,
|
||||
states.AVAILABLE)
|
||||
_HEARTBEAT_RECORD_ONLY = frozenset(__HEARTBEAT_RECORD_ONLY)
|
||||
|
||||
_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT)
|
||||
HEARTBEAT_ALLOWED = frozenset(_HEARTBEAT_ALLOWED)
|
||||
|
||||
_FASTTRACK_HEARTBEAT_ALLOWED = (states.DEPLOYWAIT, states.CLEANWAIT,
|
||||
states.RESCUEWAIT, states.ENROLL,
|
||||
states.MANAGEABLE, states.AVAILABLE,
|
||||
states.DEPLOYING)
|
||||
FASTTRACK_HEARTBEAT_ALLOWED = frozenset(_FASTTRACK_HEARTBEAT_ALLOWED)
|
||||
|
||||
|
||||
def _get_client():
|
||||
client = agent_client.AgentClient()
|
||||
@ -260,7 +273,9 @@ class HeartbeatMixin(object):
|
||||
@property
|
||||
def heartbeat_allowed_states(self):
|
||||
"""Define node states where heartbeating is allowed"""
|
||||
return (states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT)
|
||||
if CONF.deploy.fast_track:
|
||||
return FASTTRACK_HEARTBEAT_ALLOWED
|
||||
return HEARTBEAT_ALLOWED
|
||||
|
||||
@METRICS.timer('HeartbeatMixin.heartbeat')
|
||||
def heartbeat(self, task, callback_url, agent_version):
|
||||
@ -271,7 +286,8 @@ class HeartbeatMixin(object):
|
||||
:param agent_version: The version of the agent that is heartbeating
|
||||
"""
|
||||
# NOTE(pas-ha) immediately skip the rest if nothing to do
|
||||
if task.node.provision_state not in self.heartbeat_allowed_states:
|
||||
if (task.node.provision_state not in self.heartbeat_allowed_states
|
||||
and not manager_utils.fast_track_able(task)):
|
||||
LOG.debug('Heartbeat from node %(node)s in unsupported '
|
||||
'provision state %(state)s, not taking any action.',
|
||||
{'node': task.node.uuid,
|
||||
@ -288,13 +304,25 @@ class HeartbeatMixin(object):
|
||||
|
||||
node = task.node
|
||||
LOG.debug('Heartbeat from node %s', node.uuid)
|
||||
|
||||
driver_internal_info = node.driver_internal_info
|
||||
driver_internal_info['agent_url'] = callback_url
|
||||
driver_internal_info['agent_version'] = agent_version
|
||||
# Record the last heartbeat event time in UTC, so we can make
|
||||
# decisions about it later. Can be decoded to datetime object with:
|
||||
# datetime.datetime.strptime(var, "%Y-%m-%d %H:%M:%S.%f")
|
||||
driver_internal_info['agent_last_heartbeat'] = str(
|
||||
timeutils.utcnow().isoformat())
|
||||
node.driver_internal_info = driver_internal_info
|
||||
node.save()
|
||||
|
||||
if node.provision_state in _HEARTBEAT_RECORD_ONLY:
|
||||
# We shouldn't take any additional action. The agent will
|
||||
# silently continue to heartbeat to ironic until user initiated
|
||||
# state change occurs causing it to match a state below.
|
||||
LOG.debug('Heartbeat from %(node)s recorded to identify the '
|
||||
'node as on-line.', {'node': task.node.uuid})
|
||||
return
|
||||
|
||||
# Async call backs don't set error state on their own
|
||||
# TODO(jimrollenhagen) improve error messages here
|
||||
msg = _('Failed checking if deploy is done.')
|
||||
|
@ -899,8 +899,20 @@ def prepare_inband_cleaning(task, manage_boot=True):
|
||||
:raises: InvalidParameterValue if cleaning network UUID config option has
|
||||
an invalid value.
|
||||
"""
|
||||
fast_track = manager_utils.is_fast_track(task)
|
||||
if not fast_track:
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
|
||||
# WARNING(TheJulia): When fast track is available, trying to plug the
|
||||
# cleaning network is problematic and in practice this may fail if
|
||||
# cleaning/provisioning/discovery all take place on different
|
||||
# networks when..
|
||||
# Translation: Here be a realistically unavoidable footgun
|
||||
# fast track support.
|
||||
# TODO(TheJulia): Lets improve this somehow such that the agent host
|
||||
# gracefully handles these sorts of changes.
|
||||
task.driver.network.add_cleaning_network(task)
|
||||
if not fast_track:
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
|
||||
@ -912,6 +924,7 @@ def prepare_inband_cleaning(task, manage_boot=True):
|
||||
ramdisk_opts = build_agent_options(task.node)
|
||||
task.driver.boot.prepare_ramdisk(task, ramdisk_opts)
|
||||
|
||||
if not fast_track:
|
||||
manager_utils.node_power_action(task, states.REBOOT)
|
||||
|
||||
# Tell the conductor we are waiting for the agent to boot.
|
||||
@ -936,12 +949,16 @@ def tear_down_inband_cleaning(task, manage_boot=True):
|
||||
:raises: NetworkError, NodeCleaningFailure if the cleaning ports cannot be
|
||||
removed.
|
||||
"""
|
||||
fast_track = manager_utils.is_fast_track(task)
|
||||
if not fast_track:
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
|
||||
if manage_boot:
|
||||
task.driver.boot.clean_up_ramdisk(task)
|
||||
|
||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||
task.driver.network.remove_cleaning_network(task)
|
||||
if not fast_track:
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
|
||||
|
@ -408,13 +408,24 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
:returns: deploy state DEPLOYWAIT.
|
||||
"""
|
||||
node = task.node
|
||||
if task.driver.storage.should_write_image(task):
|
||||
if manager_utils.is_fast_track(task):
|
||||
LOG.debug('Performing a fast track deployment for %(node)s.',
|
||||
{'node': task.node.uuid})
|
||||
deploy_utils.cache_instance_image(task.context, node)
|
||||
check_image_size(task)
|
||||
# Update the database for the API and the task tracking resumes
|
||||
# the state machine state going from DEPLOYWAIT -> DEPLOYING
|
||||
task.process_event('wait')
|
||||
self.continue_deploy(task)
|
||||
elif task.driver.storage.should_write_image(task):
|
||||
# Standard deploy process
|
||||
deploy_utils.cache_instance_image(task.context, node)
|
||||
check_image_size(task)
|
||||
manager_utils.node_power_action(task, states.REBOOT)
|
||||
|
||||
return states.DEPLOYWAIT
|
||||
else:
|
||||
# Boot to an Storage Volume
|
||||
|
||||
# TODO(TheJulia): At some point, we should de-dupe this code
|
||||
# as it is nearly identical to the agent deploy interface.
|
||||
# This is not being done now as it is expected to be
|
||||
@ -486,23 +497,38 @@ class ISCSIDeploy(AgentDeployMixin, base.DeployInterface):
|
||||
task.driver.boot.prepare_instance(task)
|
||||
else:
|
||||
if node.provision_state == states.DEPLOYING:
|
||||
fast_track_deploy = manager_utils.is_fast_track(task)
|
||||
if fast_track_deploy:
|
||||
# The agent has already recently checked in and we are
|
||||
# configured to take that as an indicator that we can
|
||||
# skip ahead.
|
||||
LOG.debug('The agent for node %(node)s has recently '
|
||||
'checked in, and the node power will remain '
|
||||
'unmodified.',
|
||||
{'node': task.node.uuid})
|
||||
else:
|
||||
# Adding the node to provisioning network so that the dhcp
|
||||
# options get added for the provisioning port.
|
||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||
# NOTE(vdrok): in case of rebuild, we have tenant network
|
||||
# already configured, unbind tenant ports if present
|
||||
if task.driver.storage.should_write_image(task):
|
||||
if not fast_track_deploy:
|
||||
power_state_to_restore = (
|
||||
manager_utils.power_on_node_if_needed(task))
|
||||
task.driver.network.unconfigure_tenant_networks(task)
|
||||
task.driver.network.add_provisioning_network(task)
|
||||
if not fast_track_deploy:
|
||||
manager_utils.restore_power_state_if_needed(
|
||||
task, power_state_to_restore)
|
||||
task.driver.storage.attach_volumes(task)
|
||||
if not task.driver.storage.should_write_image(task):
|
||||
if (not task.driver.storage.should_write_image(task)
|
||||
or fast_track_deploy):
|
||||
# We have nothing else to do as this is handled in the
|
||||
# backend storage system, and we can return to the caller
|
||||
# as we do not need to boot the agent to deploy.
|
||||
# Alternatively, we are in a fast track deployment
|
||||
# and have nothing else to do.
|
||||
return
|
||||
|
||||
deploy_opts = deploy_utils.build_agent_options(node)
|
||||
|
@ -24,6 +24,7 @@ from six.moves import http_client
|
||||
from ironic.api.controllers import base as api_base
|
||||
from ironic.api.controllers import v1 as api_v1
|
||||
from ironic.api.controllers.v1 import ramdisk
|
||||
from ironic.common import states
|
||||
from ironic.conductor import rpcapi
|
||||
from ironic.tests.unit.api import base as test_api_base
|
||||
from ironic.tests.unit.objects import utils as obj_utils
|
||||
@ -160,6 +161,17 @@ class TestLookup(test_api_base.BaseApiTest):
|
||||
set(data['node']))
|
||||
self._check_config(data)
|
||||
|
||||
def test_fast_deploy_lookup(self):
|
||||
CONF.set_override('fast_track', True, 'deploy')
|
||||
for provision_state in [states.ENROLL, states.MANAGEABLE,
|
||||
states.AVAILABLE]:
|
||||
self.node.provision_state = provision_state
|
||||
data = self.get_json(
|
||||
'/lookup?addresses=%s&node_uuid=%s' %
|
||||
(','.join(self.addresses), self.node.uuid),
|
||||
headers={api_base.Version.string: str(api_v1.max_version())})
|
||||
self.assertEqual(self.node.uuid, data['node']['uuid'])
|
||||
|
||||
|
||||
@mock.patch.object(rpcapi.ConductorAPI, 'get_topic_for',
|
||||
lambda *n: 'test-topic')
|
||||
|
@ -13,6 +13,7 @@ import time
|
||||
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
from oslo_utils import timeutils
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
from ironic.common import boot_devices
|
||||
@ -2537,3 +2538,53 @@ class ValidateDeployTemplatesTestCase(db_base.DbTestCase):
|
||||
self.assertRaises(exception.InvalidParameterValue,
|
||||
conductor_utils.validate_deploy_templates, task)
|
||||
mock_validated.assert_called_once_with(task)
|
||||
|
||||
|
||||
@mock.patch.object(fake.FakePower, 'get_power_state', autospec=True)
|
||||
class FastTrackTestCase(db_base.DbTestCase):
|
||||
|
||||
def setUp(self):
|
||||
super(FastTrackTestCase, self).setUp()
|
||||
self.node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
uuid=uuidutils.generate_uuid(),
|
||||
driver_internal_info={
|
||||
'agent_last_heartbeat': str(timeutils.utcnow().isoformat())})
|
||||
self.config(fast_track=True, group='deploy')
|
||||
|
||||
def test_is_fast_track(self, mock_get_power):
|
||||
mock_get_power.return_value = states.POWER_ON
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertTrue(conductor_utils.is_fast_track(task))
|
||||
|
||||
def test_is_fast_track_config_false(self, mock_get_power):
|
||||
self.config(fast_track=False, group='deploy')
|
||||
mock_get_power.return_value = states.POWER_ON
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertFalse(conductor_utils.is_fast_track(task))
|
||||
|
||||
def test_is_fast_track_power_off_false(self, mock_get_power):
|
||||
mock_get_power.return_value = states.POWER_OFF
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertFalse(conductor_utils.is_fast_track(task))
|
||||
|
||||
def test_is_fast_track_no_heartbeat(self, mock_get_power):
|
||||
mock_get_power.return_value = states.POWER_ON
|
||||
i_info = self.node.driver_internal_info
|
||||
i_info.pop('agent_last_heartbeat')
|
||||
self.node.driver_internal_info = i_info
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertFalse(conductor_utils.is_fast_track(task))
|
||||
|
||||
def test_is_fast_track_error_blocks(self, mock_get_power):
|
||||
mock_get_power.return_value = states.POWER_ON
|
||||
self.node.last_error = "bad things happened"
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertFalse(conductor_utils.is_fast_track(task))
|
||||
|
@ -337,6 +337,39 @@ class TestAgentDeploy(db_base.DbTestCase):
|
||||
self.assertIsNone(driver_return)
|
||||
self.assertTrue(mock_pxe_instance.called)
|
||||
|
||||
@mock.patch.object(agent_client.AgentClient, 'prepare_image',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
|
||||
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
|
||||
def test_deploy_fast_track(self, power_mock, mock_pxe_instance,
|
||||
mock_is_fast_track, prepare_image_mock):
|
||||
mock_is_fast_track.return_value = True
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
test_temp_url = 'http://image'
|
||||
expected_image_info = {
|
||||
'urls': [test_temp_url],
|
||||
'id': 'fake-image',
|
||||
'node_uuid': self.node.uuid,
|
||||
'checksum': 'checksum',
|
||||
'disk_format': 'qcow2',
|
||||
'container_format': 'bare',
|
||||
'stream_raw_images': CONF.agent.stream_raw_images,
|
||||
}
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node['uuid'], shared=False) as task:
|
||||
self.driver.deploy(task)
|
||||
self.assertFalse(power_mock.called)
|
||||
self.assertFalse(mock_pxe_instance.called)
|
||||
task.node.refresh()
|
||||
prepare_image_mock.assert_called_with(mock.ANY, task.node,
|
||||
expected_image_info)
|
||||
self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE,
|
||||
task.node.target_provision_state)
|
||||
|
||||
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
|
||||
autospec=True)
|
||||
@mock.patch.object(flat_network.FlatNetwork,
|
||||
@ -778,6 +811,52 @@ class TestAgentDeploy(db_base.DbTestCase):
|
||||
build_options_mock.assert_not_called()
|
||||
pxe_prepare_ramdisk_mock.assert_not_called()
|
||||
|
||||
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
|
||||
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
|
||||
autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info')
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk')
|
||||
@mock.patch.object(deploy_utils, 'build_agent_options')
|
||||
@mock.patch.object(deploy_utils, 'build_instance_info_for_deploy')
|
||||
@mock.patch.object(flat_network.FlatNetwork, 'add_provisioning_network',
|
||||
spec_set=True, autospec=True)
|
||||
@mock.patch.object(flat_network.FlatNetwork,
|
||||
'unconfigure_tenant_networks',
|
||||
spec_set=True, autospec=True)
|
||||
@mock.patch.object(flat_network.FlatNetwork, 'validate',
|
||||
spec_set=True, autospec=True)
|
||||
def test_prepare_fast_track(
|
||||
self, validate_net_mock,
|
||||
unconfigure_tenant_net_mock, add_provisioning_net_mock,
|
||||
build_instance_info_mock, build_options_mock,
|
||||
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
|
||||
storage_attach_volumes_mock, is_fast_track_mock):
|
||||
# TODO(TheJulia): We should revisit this test. Smartnic
|
||||
# support didn't wire in tightly on testing for power in
|
||||
# these tests, and largely fast_track impacts power operations.
|
||||
node = self.node
|
||||
node.network_interface = 'flat'
|
||||
node.save()
|
||||
is_fast_track_mock.return_value = True
|
||||
with task_manager.acquire(
|
||||
self.context, self.node['uuid'], shared=False) as task:
|
||||
task.node.provision_state = states.DEPLOYING
|
||||
build_options_mock.return_value = {'a': 'b'}
|
||||
self.driver.prepare(task)
|
||||
storage_driver_info_mock.assert_called_once_with(task)
|
||||
validate_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
add_provisioning_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
unconfigure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
self.assertTrue(storage_attach_volumes_mock.called)
|
||||
self.assertTrue(build_instance_info_mock.called)
|
||||
# TODO(TheJulia): We should likely consider executing the
|
||||
# next two methods at some point in order to facilitate
|
||||
# continuity. While not explicitly required for this feature
|
||||
# to work, reboots as part of deployment would need the ramdisk
|
||||
# present and ready.
|
||||
self.assertFalse(build_options_mock.called)
|
||||
self.assertFalse(pxe_prepare_ramdisk_mock.called)
|
||||
|
||||
@mock.patch('ironic.common.dhcp_factory.DHCPFactory._set_dhcp_provider')
|
||||
@mock.patch('ironic.common.dhcp_factory.DHCPFactory.clean_dhcp')
|
||||
@mock.patch.object(pxe.PXEBoot, 'clean_up_instance')
|
||||
|
@ -355,8 +355,29 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
|
||||
task.node.driver_internal_info['agent_url'])
|
||||
self.assertEqual('3.2.0',
|
||||
task.node.driver_internal_info['agent_version'])
|
||||
self.assertIsNotNone(
|
||||
task.node.driver_internal_info['agent_last_heartbeat'])
|
||||
mock_touch.assert_called_once_with(mock.ANY)
|
||||
|
||||
def test_heartbeat_records_fast_track(self):
|
||||
self.config(fast_track=True, group='deploy')
|
||||
for provision_state in [states.ENROLL, states.MANAGEABLE,
|
||||
states.AVAILABLE]:
|
||||
self.node.driver_internal_info = {}
|
||||
self.node.provision_state = provision_state
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '3.2.0')
|
||||
self.assertEqual('http://127.0.0.1:8080',
|
||||
task.node.driver_internal_info['agent_url'])
|
||||
self.assertEqual('3.2.0',
|
||||
task.node.driver_internal_info[
|
||||
'agent_version'])
|
||||
self.assertIsNotNone(
|
||||
task.node.driver_internal_info['agent_last_heartbeat'])
|
||||
self.assertEqual(provision_state, task.node.provision_state)
|
||||
|
||||
|
||||
class AgentRescueTests(AgentDeployMixinBaseTest):
|
||||
|
||||
|
@ -1699,6 +1699,7 @@ class AgentMethodsTestCase(db_base.DbTestCase):
|
||||
self.assertEqual(8, task.node.driver_internal_info[
|
||||
'disk_erasure_concurrency'])
|
||||
|
||||
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', autospec=True)
|
||||
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
|
||||
@mock.patch.object(utils, 'build_agent_options', autospec=True)
|
||||
@ -1707,15 +1708,19 @@ class AgentMethodsTestCase(db_base.DbTestCase):
|
||||
def _test_prepare_inband_cleaning(
|
||||
self, add_cleaning_network_mock,
|
||||
build_options_mock, power_mock, prepare_ramdisk_mock,
|
||||
manage_boot=True):
|
||||
is_fast_track_mock, manage_boot=True, fast_track=False):
|
||||
build_options_mock.return_value = {'a': 'b'}
|
||||
is_fast_track_mock.return_value = fast_track
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
self.assertEqual(
|
||||
states.CLEANWAIT,
|
||||
utils.prepare_inband_cleaning(task, manage_boot=manage_boot))
|
||||
add_cleaning_network_mock.assert_called_once_with(task)
|
||||
if not fast_track:
|
||||
power_mock.assert_called_once_with(task, states.REBOOT)
|
||||
else:
|
||||
self.assertFalse(power_mock.called)
|
||||
self.assertEqual(1, task.node.driver_internal_info[
|
||||
'agent_erase_devices_iterations'])
|
||||
self.assertIs(True, task.node.driver_internal_info[
|
||||
@ -1734,17 +1739,26 @@ class AgentMethodsTestCase(db_base.DbTestCase):
|
||||
def test_prepare_inband_cleaning_manage_boot_false(self):
|
||||
self._test_prepare_inband_cleaning(manage_boot=False)
|
||||
|
||||
def test_prepare_inband_cleaning_fast_track(self):
|
||||
self._test_prepare_inband_cleaning(fast_track=True)
|
||||
|
||||
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
|
||||
@mock.patch.object(pxe.PXEBoot, 'clean_up_ramdisk', autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.'
|
||||
'remove_cleaning_network')
|
||||
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
|
||||
def _test_tear_down_inband_cleaning(
|
||||
self, power_mock, remove_cleaning_network_mock,
|
||||
clean_up_ramdisk_mock, manage_boot=True):
|
||||
clean_up_ramdisk_mock, is_fast_track_mock,
|
||||
manage_boot=True, fast_track=False):
|
||||
is_fast_track_mock.return_value = fast_track
|
||||
with task_manager.acquire(
|
||||
self.context, self.node.uuid, shared=False) as task:
|
||||
utils.tear_down_inband_cleaning(task, manage_boot=manage_boot)
|
||||
if not fast_track:
|
||||
power_mock.assert_called_once_with(task, states.POWER_OFF)
|
||||
else:
|
||||
self.assertFalse(power_mock.called)
|
||||
remove_cleaning_network_mock.assert_called_once_with(task)
|
||||
if manage_boot:
|
||||
clean_up_ramdisk_mock.assert_called_once_with(
|
||||
@ -1758,6 +1772,9 @@ class AgentMethodsTestCase(db_base.DbTestCase):
|
||||
def test_tear_down_inband_cleaning_manage_boot_false(self):
|
||||
self._test_tear_down_inband_cleaning(manage_boot=False)
|
||||
|
||||
def test_tear_down_inband_cleaning_fast_track(self):
|
||||
self._test_tear_down_inband_cleaning(fast_track=True)
|
||||
|
||||
def test_build_agent_options_conf(self):
|
||||
self.config(api_url='https://api-url', group='conductor')
|
||||
options = utils.build_agent_options(self.node)
|
||||
|
@ -734,6 +734,53 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
|
||||
task.driver.storage, task)
|
||||
self.assertEqual(2, storage_should_write_mock.call_count)
|
||||
|
||||
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
|
||||
@mock.patch.object(noop_storage.NoopStorage, 'attach_volumes',
|
||||
autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info')
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk')
|
||||
@mock.patch.object(deploy_utils, 'build_agent_options')
|
||||
@mock.patch.object(deploy_utils, 'build_instance_info_for_deploy')
|
||||
@mock.patch.object(flat_network.FlatNetwork, 'add_provisioning_network',
|
||||
spec_set=True, autospec=True)
|
||||
@mock.patch.object(flat_network.FlatNetwork,
|
||||
'unconfigure_tenant_networks',
|
||||
spec_set=True, autospec=True)
|
||||
@mock.patch.object(flat_network.FlatNetwork, 'validate',
|
||||
spec_set=True, autospec=True)
|
||||
def test_prepare_fast_track(
|
||||
self, validate_net_mock,
|
||||
unconfigure_tenant_net_mock, add_provisioning_net_mock,
|
||||
build_instance_info_mock, build_options_mock,
|
||||
pxe_prepare_ramdisk_mock, storage_driver_info_mock,
|
||||
storage_attach_volumes_mock, is_fast_track_mock):
|
||||
# TODO(TheJulia): We should revisit this test. Smartnic
|
||||
# support didn't wire in tightly on testing for power in
|
||||
# these tests, and largely fast_track impacts power operations.
|
||||
node = self.node
|
||||
node.network_interface = 'flat'
|
||||
node.save()
|
||||
is_fast_track_mock.return_value = True
|
||||
with task_manager.acquire(
|
||||
self.context, self.node['uuid'], shared=False) as task:
|
||||
task.node.provision_state = states.DEPLOYING
|
||||
build_options_mock.return_value = {'a': 'b'}
|
||||
task.driver.deploy.prepare(task)
|
||||
storage_driver_info_mock.assert_called_once_with(task)
|
||||
# NOTE: Validate is the primary difference between agent/iscsi
|
||||
self.assertFalse(validate_net_mock.called)
|
||||
add_provisioning_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
unconfigure_tenant_net_mock.assert_called_once_with(mock.ANY, task)
|
||||
self.assertTrue(storage_attach_volumes_mock.called)
|
||||
self.assertFalse(build_instance_info_mock.called)
|
||||
# TODO(TheJulia): We should likely consider executing the
|
||||
# next two methods at some point in order to facilitate
|
||||
# continuity. While not explicitly required for this feature
|
||||
# to work, reboots as part of deployment would need the ramdisk
|
||||
# present and ready.
|
||||
self.assertFalse(build_options_mock.called)
|
||||
self.assertFalse(pxe_prepare_ramdisk_mock.called)
|
||||
|
||||
@mock.patch.object(manager_utils, 'node_power_action', autospec=True)
|
||||
@mock.patch.object(iscsi_deploy, 'check_image_size', autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'cache_instance_image', autospec=True)
|
||||
@ -787,6 +834,36 @@ class ISCSIDeployTestCase(db_base.DbTestCase):
|
||||
self.assertEqual(2, mock_node_power_action.call_count)
|
||||
self.assertEqual(states.DEPLOYING, task.node.provision_state)
|
||||
|
||||
@mock.patch.object(iscsi_deploy, 'check_image_size', autospec=True)
|
||||
@mock.patch.object(deploy_utils, 'cache_instance_image', autospec=True)
|
||||
@mock.patch.object(iscsi_deploy.ISCSIDeploy, 'continue_deploy',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.conductor.utils.is_fast_track', autospec=True)
|
||||
@mock.patch.object(pxe.PXEBoot, 'prepare_instance', autospec=True)
|
||||
@mock.patch('ironic.conductor.utils.node_power_action', autospec=True)
|
||||
def test_deploy_fast_track(self, power_mock, mock_pxe_instance,
|
||||
mock_is_fast_track, continue_deploy_mock,
|
||||
cache_image_mock, check_image_size_mock):
|
||||
mock_is_fast_track.return_value = True
|
||||
self.node.target_provision_state = states.ACTIVE
|
||||
self.node.provision_state = states.DEPLOYING
|
||||
i_info = self.node.driver_internal_info
|
||||
i_info['agent_url'] = 'http://1.2.3.4:1234'
|
||||
self.node.driver_internal_info = i_info
|
||||
self.node.save()
|
||||
with task_manager.acquire(
|
||||
self.context, self.node['uuid'], shared=False) as task:
|
||||
task.driver.deploy.deploy(task)
|
||||
self.assertFalse(power_mock.called)
|
||||
self.assertFalse(mock_pxe_instance.called)
|
||||
task.node.refresh()
|
||||
self.assertEqual(states.DEPLOYWAIT, task.node.provision_state)
|
||||
self.assertEqual(states.ACTIVE,
|
||||
task.node.target_provision_state)
|
||||
cache_image_mock.assert_called_with(mock.ANY, task.node)
|
||||
check_image_size_mock.assert_called_with(task)
|
||||
continue_deploy_mock.assert_called_with(mock.ANY, task)
|
||||
|
||||
@mock.patch.object(noop_storage.NoopStorage, 'detach_volumes',
|
||||
autospec=True)
|
||||
@mock.patch.object(flat_network.FlatNetwork,
|
||||
|
@ -0,0 +1,16 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds a new feature called `fast-track` which allows an operator to
|
||||
optionally configure the Bare Metal API Service and the Bare Metal
|
||||
conductor service to permit lookup and heartbeat for nodes that are
|
||||
in the process of being enrolled and created.
|
||||
|
||||
These nodes can be left online, from a process such as discovery.
|
||||
If ironic-python-agent has communicated with the Bare Metal Service
|
||||
API endpoint with-in the last `300` seconds, then setup steps that
|
||||
are normally involved with preparing to launch a ramdisk on the node,
|
||||
are skipped along with power operations to enable a baremetal node to
|
||||
undergo discovery through to deployment with a single power cycle.
|
||||
Fast track functionality may be enabled through the ``[deploy]fast_track``
|
||||
option.
|
@ -466,3 +466,14 @@
|
||||
Run python 3 unit tests with driver dependencies installed.
|
||||
vars:
|
||||
tox_envlist: unit-with-driver-libs-python3
|
||||
|
||||
- job:
|
||||
name: ironic-inspector-tempest-discovery-fast-track
|
||||
description: ironic-inspector-tempest-discovery-fast-track
|
||||
parent: ironic-inspector-tempest-discovery
|
||||
vars:
|
||||
tempest_test_regex: BareMetalFastTrackTest
|
||||
devstack_localrc:
|
||||
IRONIC_INSPECTOR_POWER_OFF: False
|
||||
IRONIC_DEPLOY_FAST_TRACK: True
|
||||
IRONIC_DEPLOY_FAST_TRACK_CLEANING: True
|
||||
|
Loading…
Reference in New Issue
Block a user