Migrate the inspector's /continue API

This change creates all necessary parts to processing inspection data:

* New API /v1/continue_inspection

Depending on the API version, either behaves like the inspector's API
or (new version) adds the lookup functionality on top.

The lookup process is migrated from ironic-inspector with minor changes.
It takes MAC addresses, BMC addresses and (optionally) a node UUID and
tries to find a single node in INSPECTWAIT state that satisfies all
of these. Any failure results in HTTP 404.

To make lookup faster, the resolved BMC addresses are cached in advance.

* New RPC continue_inspection

Essentially, checks the provision state again and delegates to the
inspect interface.

* New inspect interface call continue_inspection

The base version does nothing. Since we don't yet have in-band
inspection in Ironic proper, the only actual implementation is added
to the existing "inspector" interface that works by doing a call
to ironic-inspector.

Story: #2010275
Task: #46208
Change-Id: Ia3f5bb9d1845d6b8fab30232a72b5a360a5a56d2
This commit is contained in:
Dmitry Tantsur 2023-03-01 14:50:30 +01:00
parent 97f7177495
commit 0370f5ac97
21 changed files with 791 additions and 30 deletions

View File

@ -2,8 +2,14 @@
REST API Version History REST API Version History
======================== ========================
1.84 (Bobcat)
-------------
Add callback endpoint for in-band inspection ``/v1/continue_inspection``.
This endpoint is not designed to be used by regular users.
1.83 (Bobcat) 1.83 (Bobcat)
---------------------- -------------
This version adds a concept of child nodes through the use of a This version adds a concept of child nodes through the use of a
``parent_node`` field which can be set on a node. ``parent_node`` field which can be set on a node.

View File

@ -32,6 +32,7 @@ app = {
# IPA ramdisk methods # IPA ramdisk methods
'/v1/lookup', '/v1/lookup',
'/v1/heartbeat/[a-z0-9\\-]+', '/v1/heartbeat/[a-z0-9\\-]+',
'/v1/continue_inspection',
], ],
} }

View File

@ -77,6 +77,11 @@ VERSIONED_CONTROLLERS = {
'events': utils.allow_expose_events, 'events': utils.allow_expose_events,
'deploy_templates': utils.allow_deploy_templates, 'deploy_templates': utils.allow_deploy_templates,
'shards': utils.allow_shards_endpoint, 'shards': utils.allow_shards_endpoint,
# NOTE(dtantsur): continue_inspection is available in 1.1 as a
# compatibility hack to make it usable with IPA without changes.
# Hide this fact from consumers since it was not actually available
# back in Kilo.
'continue_inspection': utils.new_continue_inspection_endpoint,
} }
@ -125,6 +130,7 @@ class Controller(object):
'events': event.EventsController(), 'events': event.EventsController(),
'deploy_templates': deploy_template.DeployTemplatesController(), 'deploy_templates': deploy_template.DeployTemplatesController(),
'shards': shard.ShardController(), 'shards': shard.ShardController(),
'continue_inspection': ramdisk.ContinueInspectionController(),
} }
@method.expose() @method.expose()

View File

@ -21,12 +21,14 @@ from pecan import rest
from ironic import api from ironic import api
from ironic.api.controllers.v1 import node as node_ctl from ironic.api.controllers.v1 import node as node_ctl
from ironic.api.controllers.v1 import utils as api_utils from ironic.api.controllers.v1 import utils as api_utils
from ironic.api.controllers.v1 import versions
from ironic.api import method from ironic.api import method
from ironic.common import args from ironic.common import args
from ironic.common import exception from ironic.common import exception
from ironic.common.i18n import _ from ironic.common.i18n import _
from ironic.common import states from ironic.common import states
from ironic.common import utils from ironic.common import utils
from ironic.drivers.modules import inspect_utils
from ironic import objects from ironic import objects
@ -66,6 +68,31 @@ def convert_with_links(node):
return {'node': node, 'config': config(token)} return {'node': node, 'config': config(token)}
def get_valid_mac_addresses(addresses, node_uuid=None):
if addresses is None:
addresses = []
valid_addresses = []
invalid_addresses = []
for addr in addresses:
try:
mac = utils.validate_and_normalize_mac(addr)
valid_addresses.append(mac)
except exception.InvalidMAC:
invalid_addresses.append(addr)
if invalid_addresses:
node_log = ('' if not node_uuid
else '(Node UUID: %s)' % node_uuid)
LOG.warning('The following MAC addresses "%(addrs)s" are '
'invalid and will be ignored by the lookup '
'request %(node)s',
{'addrs': ', '.join(invalid_addresses),
'node': node_log})
return valid_addresses
class LookupController(rest.RestController): class LookupController(rest.RestController):
"""Controller handling node lookup for a deploy ramdisk.""" """Controller handling node lookup for a deploy ramdisk."""
@ -100,27 +127,7 @@ class LookupController(rest.RestController):
api_utils.check_policy('baremetal:driver:ipa_lookup') api_utils.check_policy('baremetal:driver:ipa_lookup')
# Validate the list of MAC addresses # Validate the list of MAC addresses
if addresses is None: valid_addresses = get_valid_mac_addresses(addresses)
addresses = []
valid_addresses = []
invalid_addresses = []
for addr in addresses:
try:
mac = utils.validate_and_normalize_mac(addr)
valid_addresses.append(mac)
except exception.InvalidMAC:
invalid_addresses.append(addr)
if invalid_addresses:
node_log = ('' if not node_uuid
else '(Node UUID: %s)' % node_uuid)
LOG.warning('The following MAC addresses "%(addrs)s" are '
'invalid and will be ignored by the lookup '
'request %(node)s',
{'addrs': ', '.join(invalid_addresses),
'node': node_log})
if not valid_addresses and not node_uuid: if not valid_addresses and not node_uuid:
raise exception.IncompleteLookup() raise exception.IncompleteLookup()
@ -254,3 +261,99 @@ class HeartbeatController(rest.RestController):
api.request.context, rpc_node.uuid, callback_url, api.request.context, rpc_node.uuid, callback_url,
agent_version, agent_token, agent_verify_ca, agent_status, agent_version, agent_token, agent_verify_ca, agent_status,
agent_status_message, topic=topic) agent_status_message, topic=topic)
DATA_VALIDATOR = args.schema({
'type': 'object',
'properties': {
# This validator defines a minimal acceptable inventory.
'inventory': {
'type': 'object',
'properties': {
'bmc_address': {'type': 'string'},
'bmc_v6address': {'type': 'string'},
'interfaces': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'mac_address': {'type': 'string'},
},
'required': ['mac_address'],
'additionalProperties': True,
},
'minItems': 1,
},
},
'required': ['interfaces'],
'additionalProperties': True,
},
},
'required': ['inventory'],
'additionalProperties': True,
})
class ContinueInspectionController(rest.RestController):
"""Controller handling inspection data from deploy ramdisk."""
@method.expose(status_code=http_client.ACCEPTED)
@method.body('data')
@args.validate(data=DATA_VALIDATOR, node_uuid=args.uuid)
def post(self, data, node_uuid=None):
"""Process a introspection data from the deploy ramdisk.
:param data: Introspection data.
:param node_uuid: UUID of a node.
:raises: InvalidParameterValue if node_uuid is a valid UUID.
:raises: NoValidHost if RPC topic for node could not be retrieved.
:raises: NotFound if requested API version does not allow this
endpoint or if lookup fails.
"""
if (not api_utils.allow_continue_inspection_endpoint()
# Node UUID support is a new addition
or (node_uuid
and not api_utils.new_continue_inspection_endpoint())):
raise exception.NotFound(
# This is a small lie: 1.1 is accepted as well, but no need
# to really advertise this fact, it's only for compatibility.
_('API version 1.%d or newer is required')
% versions.MINOR_83_CONTINUE_INSPECTION)
api_utils.check_policy('baremetal:node:ipa_continue_inspection')
inventory = data.pop('inventory')
macs = get_valid_mac_addresses(
iface['mac_address'] for iface in inventory['interfaces'])
bmc_addresses = list(
filter(None, (inventory.get('bmc_address'),
inventory.get('bmc_v6address')))
)
if not macs and not bmc_addresses and not node_uuid:
raise exception.BadRequest(_('No lookup information provided'))
rpc_node = inspect_utils.lookup_node(
api.request.context, macs, bmc_addresses, node_uuid=node_uuid)
try:
topic = api.request.rpcapi.get_topic_for(rpc_node)
except exception.NoValidHost as e:
e.code = http_client.BAD_REQUEST
raise
if api_utils.new_continue_inspection_endpoint():
# This has to happen before continue_inspection since processing
# the data may take significant time, and creating a token required
# a lock on the node.
rpc_node = api.request.rpcapi.get_node_with_token(
api.request.context, rpc_node.uuid, topic=topic)
api.request.rpcapi.continue_inspection(
api.request.context, rpc_node.uuid, inventory=inventory,
plugin_data=data, topic=topic)
if api_utils.new_continue_inspection_endpoint():
return convert_with_links(rpc_node)
else:
# Compatibility with ironic-inspector
return {'uuid': rpc_node.uuid}

View File

@ -1986,3 +1986,18 @@ def check_allow_clean_disable_ramdisk(target, disable_ramdisk):
def allow_shards_endpoint(): def allow_shards_endpoint():
"""Check if shards endpoint is available.""" """Check if shards endpoint is available."""
return api.request.version.minor >= versions.MINOR_82_NODE_SHARD return api.request.version.minor >= versions.MINOR_82_NODE_SHARD
def new_continue_inspection_endpoint():
"""Check if /v1/continue_inspection endpoint is explicitly requested."""
return api.request.version.minor >= versions.MINOR_84_CONTINUE_INSPECTION
def allow_continue_inspection_endpoint():
"""Check if /v1/continue_inspection endpoint is available.
As a special exception, we allow it in the base version so that the API
can be used as a drop-in replacement for the Inspector's API.
"""
return (new_continue_inspection_endpoint()
or api.request.version.minor == versions.MINOR_1_INITIAL_VERSION)

View File

@ -121,6 +121,7 @@ BASE_VERSION = 1
# v1.81: Add node inventory # v1.81: Add node inventory
# v1.82: Add node sharding capability # v1.82: Add node sharding capability
# v1.83: Add child node modeling # v1.83: Add child node modeling
# v1.84: Add ramdisk callback to continue inspection.
MINOR_0_JUNO = 0 MINOR_0_JUNO = 0
MINOR_1_INITIAL_VERSION = 1 MINOR_1_INITIAL_VERSION = 1
MINOR_2_AVAILABLE_STATE = 2 MINOR_2_AVAILABLE_STATE = 2
@ -205,6 +206,7 @@ MINOR_80_PROJECT_CREATE_DELETE_NODE = 80
MINOR_81_NODE_INVENTORY = 81 MINOR_81_NODE_INVENTORY = 81
MINOR_82_NODE_SHARD = 82 MINOR_82_NODE_SHARD = 82
MINOR_83_PARENT_CHILD_NODES = 83 MINOR_83_PARENT_CHILD_NODES = 83
MINOR_84_CONTINUE_INSPECTION = 84
# When adding another version, update: # When adding another version, update:
# - MINOR_MAX_VERSION # - MINOR_MAX_VERSION
@ -212,7 +214,7 @@ MINOR_83_PARENT_CHILD_NODES = 83
# explanation of what changed in the new version # explanation of what changed in the new version
# - common/release_mappings.py, RELEASE_MAPPING['master']['api'] # - common/release_mappings.py, RELEASE_MAPPING['master']['api']
MINOR_MAX_VERSION = MINOR_83_PARENT_CHILD_NODES MINOR_MAX_VERSION = MINOR_84_CONTINUE_INSPECTION
# String representations of the minor and maximum versions # String representations of the minor and maximum versions
_MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION) _MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION)

View File

@ -1443,6 +1443,12 @@ utility_policies = [
operations=[{'path': '/lookup', 'method': 'GET'}], operations=[{'path': '/lookup', 'method': 'GET'}],
deprecated_rule=deprecated_ipa_lookup deprecated_rule=deprecated_ipa_lookup
), ),
policy.DocumentedRuleDefault(
name='baremetal:driver:ipa_continue_inspection',
check_str='',
description='Receive inspection data from the ramdisk',
operations=[{'path': '/continue_inspection', 'method': 'POST'}],
),
] ]

View File

@ -574,8 +574,8 @@ RELEASE_MAPPING = {
} }
}, },
'master': { 'master': {
'api': '1.83', 'api': '1.84',
'rpc': '1.55', 'rpc': '1.56',
'objects': { 'objects': {
'Allocation': ['1.1'], 'Allocation': ['1.1'],
'BIOSSetting': ['1.1'], 'BIOSSetting': ['1.1'],

View File

@ -482,6 +482,9 @@ machine.add_transition(INSPECTWAIT, INSPECTFAIL, 'fail')
# Inspection is aborted. # Inspection is aborted.
machine.add_transition(INSPECTWAIT, INSPECTFAIL, 'abort') machine.add_transition(INSPECTWAIT, INSPECTFAIL, 'abort')
# Inspection is continued.
machine.add_transition(INSPECTWAIT, INSPECTING, 'resume')
# Move the node to manageable state for any other # Move the node to manageable state for any other
# action. # action.
machine.add_transition(INSPECTFAIL, MANAGEABLE, 'manage') machine.add_transition(INSPECTFAIL, MANAGEABLE, 'manage')

View File

@ -20,6 +20,7 @@ from ironic.common.i18n import _
from ironic.common import states from ironic.common import states
from ironic.conductor import task_manager from ironic.conductor import task_manager
from ironic.conductor import utils from ironic.conductor import utils
from ironic.drivers.modules import inspect_utils
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
@ -106,3 +107,45 @@ def abort_inspection(task):
task.process_event('abort') task.process_event('abort')
LOG.info('Successfully aborted inspection of node %(node)s', LOG.info('Successfully aborted inspection of node %(node)s',
{'node': node.uuid}) {'node': node.uuid})
@task_manager.require_exclusive_lock
def continue_inspection(task, inventory, plugin_data):
"""Continue inspection for the node."""
node = task.node
LOG.debug('Inventory for node %(node)s: %(data)s',
{'node': node.uuid, 'data': inventory})
try:
result = task.driver.inspect.continue_inspection(
task, inventory, plugin_data)
if result == states.INSPECTWAIT:
if task.node.provision_state == states.INSPECTING:
task.process_event('wait')
LOG.debug('Waiting for inspection data to be processed '
'asynchronously for node %s', node.uuid)
return
# NOTE(dtantsur): this is done *after* processing to allow
# modifications, especially to plugin_data.
inspect_utils.store_inspection_data(
node, inventory, plugin_data, context=task.context)
except exception.UnsupportedDriverExtension:
with excutils.save_and_reraise_exception():
intf_name = task.driver.inspect.__class__.__name__
LOG.error('Inspect interface %(intf)s does not '
'support processing inspection data for node %(node)s',
{'intf': intf_name, 'node': node.uuid})
except Exception as e:
with excutils.save_and_reraise_exception():
LOG.exception('Error when processing inspection data for '
'node %(node)s', {'node': node.uuid})
error = _('Failed to finish inspection: %s') % e
utils.node_history_record(task.node, event=error,
event_type=states.INTROSPECTION,
error=True)
task.process_event('fail')
task.process_event('done')
LOG.info('Successfully finished inspection of node %s', node.uuid)

View File

@ -93,7 +93,7 @@ class ConductorManager(base_manager.BaseConductorManager):
# NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's. # NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's.
# NOTE(pas-ha): This also must be in sync with # NOTE(pas-ha): This also must be in sync with
# ironic.common.release_mappings.RELEASE_MAPPING['master'] # ironic.common.release_mappings.RELEASE_MAPPING['master']
RPC_API_VERSION = '1.55' RPC_API_VERSION = '1.56'
target = messaging.Target(version=RPC_API_VERSION) target = messaging.Target(version=RPC_API_VERSION)
@ -3652,6 +3652,41 @@ class ConductorManager(base_manager.BaseConductorManager):
raise exception.ConcurrentActionLimit( raise exception.ConcurrentActionLimit(
task_type=action) task_type=action)
@METRICS.timer('ConductorManager.continue_inspection')
@messaging.expected_exceptions(exception.NodeLocked,
exception.NotFound,
exception.Invalid)
def continue_inspection(self, context, node_id, inventory,
plugin_data=None):
"""Continue in-band inspection.
:param context: request context.
:param node_id: node ID or UUID.
:param inventory: hardware inventory from the node.
:param plugin_data: optional plugin-specific data.
:raises: NodeLocked if node is locked by another conductor.
:raises: NotFound if node is in invalid state.
"""
LOG.debug("RPC continue_inspection called for the node %(node_id)s",
{'node_id': node_id})
with task_manager.acquire(context, node_id,
purpose='continue inspection',
shared=False) as task:
# TODO(dtantsur): support active state (re-)inspection
if task.node.provision_state != states.INSPECTWAIT:
LOG.error('Refusing to process inspection data for node '
'%(node)s in invalid state %(state)s',
{'node': task.node.uuid,
'state': task.node.provision_state})
raise exception.NotFound()
task.process_event(
'resume',
callback=self._spawn_worker,
call_args=(inspection.continue_inspection,
task, inventory, plugin_data),
err_handler=utils.provisioning_error_handler)
@METRICS.timer('get_vendor_passthru_metadata') @METRICS.timer('get_vendor_passthru_metadata')
def get_vendor_passthru_metadata(route_dict): def get_vendor_passthru_metadata(route_dict):

View File

@ -152,12 +152,13 @@ class ConductorAPI(object):
| 1.54 - Added optional agent_status and agent_status_message to | 1.54 - Added optional agent_status and agent_status_message to
heartbeat heartbeat
| 1.55 - Added change_node_boot_mode | 1.55 - Added change_node_boot_mode
| 1.56 - Added continue_inspection
""" """
# NOTE(rloo): This must be in sync with manager.ConductorManager's. # NOTE(rloo): This must be in sync with manager.ConductorManager's.
# NOTE(pas-ha): This also must be in sync with # NOTE(pas-ha): This also must be in sync with
# ironic.common.release_mappings.RELEASE_MAPPING['master'] # ironic.common.release_mappings.RELEASE_MAPPING['master']
RPC_API_VERSION = '1.55' RPC_API_VERSION = '1.56'
def __init__(self, topic=None): def __init__(self, topic=None):
super(ConductorAPI, self).__init__() super(ConductorAPI, self).__init__()
@ -1393,3 +1394,18 @@ class ConductorAPI(object):
""" """
cctxt = self._prepare_call(topic=topic, version='1.49') cctxt = self._prepare_call(topic=topic, version='1.49')
return cctxt.call(context, 'get_node_with_token', node_id=node_id) return cctxt.call(context, 'get_node_with_token', node_id=node_id)
def continue_inspection(self, context, node_id, inventory,
plugin_data=None, topic=None):
"""Continue in-band inspection.
:param context: request context.
:param node_id: node ID or UUID.
:param inventory: hardware inventory from the node.
:param plugin_data: optional plugin-specific data.
:param topic: RPC topic. Defaults to self.topic.
:raises: NodeLocked if node is locked by another conductor.
"""
cctxt = self._prepare_call(topic=topic, version='1.56')
return cctxt.call(context, 'continue_inspection', node_id=node_id,
inventory=inventory, plugin_data=plugin_data)

View File

@ -411,7 +411,12 @@ def provisioning_error_handler(e, node, provision_state,
node.provision_state = provision_state node.provision_state = provision_state
node.target_provision_state = target_provision_state node.target_provision_state = target_provision_state
error = (_("No free conductor workers available")) error = (_("No free conductor workers available"))
node_history_record(node, event=error, event_type=states.PROVISIONING, if provision_state in (states.INSPECTING, states.INSPECTWAIT,
states.INSPECTFAIL):
event_type = states.INTROSPECTION
else:
event_type = states.PROVISIONING
node_history_record(node, event=error, event_type=event_type,
error=True) error=True)
node.save() node.save()
LOG.warning("No free conductor workers available to perform " LOG.warning("No free conductor workers available to perform "

View File

@ -1257,6 +1257,20 @@ class InspectInterface(BaseInterface):
raise exception.UnsupportedDriverExtension( raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='abort') driver=task.node.driver, extension='abort')
def continue_inspection(self, task, inventory, plugin_data=None):
"""Continue in-band hardware inspection.
Should not be implemented for purely out-of-band implementations.
:param task: a task from TaskManager.
:param inventory: hardware inventory from the node.
:param plugin_data: optional plugin-specific data.
:raises: UnsupportedDriverExtension, if the method is not implemented
by specific inspect interface.
"""
raise exception.UnsupportedDriverExtension(
driver=task.node.driver, extension='abort')
def cache_bios_settings(func): def cache_bios_settings(func):
"""A decorator to cache bios settings after running the function. """A decorator to cache bios settings after running the function.

View File

@ -13,13 +13,19 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import socket
import urllib
from oslo_log import log as logging from oslo_log import log as logging
from oslo_utils import netutils from oslo_utils import netutils
import swiftclient.exceptions import swiftclient.exceptions
from ironic.common import exception from ironic.common import exception
from ironic.common import states
from ironic.common import swift from ironic.common import swift
from ironic.common import utils
from ironic.conf import CONF from ironic.conf import CONF
from ironic.drivers.modules import ipmitool
from ironic import objects from ironic import objects
from ironic.objects import node_inventory from ironic.objects import node_inventory
@ -212,3 +218,168 @@ def _get_inspection_data_from_swift(node_uuid):
container=container, container=container,
operation='get') operation='get')
return {"inventory": inventory_data, "plugin_data": plugin_data} return {"inventory": inventory_data, "plugin_data": plugin_data}
LOOKUP_CACHE_FIELD = 'lookup_bmc_addresses'
def lookup_node(context, mac_addresses, bmc_addresses, node_uuid=None):
"""Do a node lookup by the information from the inventory.
:param context: Request context
:param mac_addresses: List of MAC addresses.
:param bmc_addresses: List of BMC (realistically, IPMI) addresses.
:param node_uuid: Node UUID (if known).
:raises: NotFound with a generic message for all failures to avoid
disclosing any information.
"""
node = None
if node_uuid:
try:
node = objects.Node.get_by_uuid(context, node_uuid)
except exception.NotFound:
# NOTE(dtantsur): we are reraising the same exception to make sure
# we don't disclose the difference between nodes that are not found
# at all and nodes in a wrong state by different error messages.
raise exception.NotFound()
if mac_addresses:
try:
node = objects.Node.get_by_port_addresses(context, mac_addresses)
except exception.NotFound as exc:
# The exception has enough context already, just log it and move on
LOG.debug("Lookup for inspection: %s", exc)
else:
if node_uuid and node.uuid != node_uuid:
LOG.error('Conflict on inspection lookup: node %(node1)s '
'does not match MAC addresses (%(macs)s), which '
'belong to node %(node2)s. This may be a sign of '
'incorrectly created ports.',
{'node1': node_uuid,
'node2': node.uuid,
'macs': ', '.join(mac_addresses)})
raise exception.NotFound()
# TODO(dtantsur): support active state inspection
if node and node.provision_state != states.INSPECTWAIT:
LOG.error('Node %(node)s was found during inspection lookup '
'with MAC addresses %(macs)s, but it is in '
'provision state %(state)s',
{'node': node.uuid,
'macs': ', '.join(mac_addresses),
'state': node.provision_state})
raise exception.NotFound()
if bmc_addresses:
for candidate in objects.Node.list(
context,
filters={'provision_state': states.INSPECTWAIT},
fields=['uuid', 'driver_internal_info']):
# This field has to be populated on inspection start
for addr in candidate.driver_internal_info.get(
LOOKUP_CACHE_FIELD) or ():
if addr in bmc_addresses:
break
else:
continue
if node and candidate.uuid != node.uuid:
LOG.error('Conflict on inspection lookup: nodes %(node1)s '
'and %(node2)s both satisfy MAC addresses '
'(%(macs)s) and BMC address(s) (%(bmc)s). The cause '
'may be ports attached to a wrong node.',
{'node1': node.uuid,
'node2': candidate.uuid,
'macs': ', '.join(mac_addresses),
'bmc': ', '.join(bmc_addresses)})
raise exception.NotFound()
try:
# Fetch the complete object now.
node = objects.Node.get_by_uuid(context, candidate.uuid)
except exception.NotFound:
pass # Deleted in-between?
if not node:
LOG.error('No nodes satisfy MAC addresses (%(macs)s) and BMC '
'address(s) (%(bmc)s) during inspection lookup',
{'macs': ', '.join(mac_addresses),
'bmc': ', '.join(bmc_addresses)})
raise exception.NotFound()
LOG.debug('Inspection lookup succeeded for node %(node)s using MAC '
'addresses %(mac)s and BMC addresses %(bmc)s',
{'node': node.uuid, 'mac': mac_addresses, 'bmc': bmc_addresses})
return node
def _get_bmc_addresses(node):
"""Get the BMC address defined in the node's driver_info.
All valid hosts are returned along with their v4 and v6 IP addresses.
:param node: Node object with defined driver_info dictionary
:return: a set with suitable addresses
"""
result = set()
# FIXME(dtantsur): this extremely lame process is adapted from
# ironic-inspector. Now that it's in Ironic proper, we need to replace it
# with something using information from hardware types.
for name, address in node.driver_info.items():
if not name.endswith('_address'):
continue
# NOTE(sambetts): IPMI address is useless to us if bridging is enabled
# so just ignore it.
if (name.startswith('ipmi_')
and ipmitool.is_bridging_enabled(node)):
LOG.debug('Will not used %(field)s %(addr)s for lookup since '
'IPMI bridging is enabled for node %(node)s',
{'addr': address, 'field': name, 'node': node.uuid})
continue
if '//' in address:
address = urllib.parse.urlparse(address).hostname
try:
addrinfo = socket.getaddrinfo(address, None, proto=socket.SOL_TCP)
except socket.gaierror as exc:
LOG.warning('Failed to resolve the hostname (%(addr)s) in '
'%(field)s of node %(node)s: %(exc)s',
{'addr': address, 'field': name, 'node': node.uuid,
'exc': exc})
continue
for *other, sockaddr in addrinfo:
ip = sockaddr[0]
if utils.is_loopback(ip):
LOG.warning('Ignoring loopback %(field)s %(addr)s '
'for node %(node)s',
{'addr': ip, 'field': name, 'node': node.uuid})
else:
result.add(ip)
if not utils.is_loopback(address):
result.add(address)
return result
def cache_lookup_addresses(node):
"""Cache lookup addresses for a quick access."""
addresses = _get_bmc_addresses(node)
if addresses:
LOG.debug('Will use the following BMC addresses for inspection lookup '
'of node %(node)s: %(addr)s',
{'node': node.uuid, 'addr': addresses})
node.set_driver_internal_info(LOOKUP_CACHE_FIELD, list(addresses))
else:
LOG.debug('No BMC addresses to use for inspection lookup of node %s',
node.uuid)
clear_lookup_addresses(node)
def clear_lookup_addresses(node):
"""Remove lookup addresses cached on the node."""
return node.del_driver_internal_info(LOOKUP_CACHE_FIELD)

View File

@ -213,6 +213,10 @@ class Inspector(base.InspectInterface):
utils.set_node_nested_field(task.node, 'driver_internal_info', utils.set_node_nested_field(task.node, 'driver_internal_info',
_IRONIC_MANAGES_BOOT, _IRONIC_MANAGES_BOOT,
ironic_manages_boot) ironic_manages_boot)
# Make this interface work with the Ironic own /continue_inspection
# endpoint to simplify migration to the new in-band inspection
# implementation.
inspect_utils.cache_lookup_addresses(task.node)
task.node.save() task.node.save()
LOG.debug('Starting inspection for node %(uuid)s using ' LOG.debug('Starting inspection for node %(uuid)s using '
@ -239,6 +243,8 @@ class Inspector(base.InspectInterface):
LOG.debug('Aborting inspection for node %(uuid)s using ' LOG.debug('Aborting inspection for node %(uuid)s using '
'ironic-inspector', {'uuid': node_uuid}) 'ironic-inspector', {'uuid': node_uuid})
client.get_client(task.context).abort_introspection(node_uuid) client.get_client(task.context).abort_introspection(node_uuid)
if inspect_utils.clear_lookup_addresses(task.node):
task.node.save()
@periodics.node_periodic( @periodics.node_periodic(
purpose='checking hardware inspection status', purpose='checking hardware inspection status',
@ -249,6 +255,24 @@ class Inspector(base.InspectInterface):
"""Periodic task checking results of inspection.""" """Periodic task checking results of inspection."""
_check_status(task) _check_status(task)
def continue_inspection(self, task, inventory, plugin_data=None):
"""Continue in-band hardware inspection.
This implementation simply defers to ironic-inspector. It only exists
to simplify the transition to Ironic-native in-band inspection.
:param task: a task from TaskManager.
:param inventory: hardware inventory from the node.
:param plugin_data: optional plugin-specific data.
"""
cli = client.get_client(task.context)
endpoint = _get_callback_endpoint(cli)
data = dict(plugin_data, inventory=inventory) # older format
task.process_event('wait')
task.downgrade_lock()
cli.post(endpoint, json=data)
return states.INSPECTWAIT
def _start_inspection(node_uuid, context): def _start_inspection(node_uuid, context):
"""Call to inspector to start inspection.""" """Call to inspector to start inspection."""
@ -300,6 +324,8 @@ def _check_status(task):
task.upgrade_lock() task.upgrade_lock()
node = task.node node = task.node
inspect_utils.clear_lookup_addresses(node)
if status.error: if status.error:
LOG.error('Inspection failed for node %(uuid)s with error: %(err)s', LOG.error('Inspection failed for node %(uuid)s with error: %(err)s',
{'uuid': node.uuid, 'err': status.error}) {'uuid': node.uuid, 'err': status.error})

View File

@ -313,6 +313,14 @@ def _make_password_file(password):
f.close() f.close()
def is_bridging_enabled(node):
"""Check if IPMI bridging is enabled.
This call is used in the inspector lookup.
"""
return node.driver_info.get("ipmi_bridging", "no") != "no"
def _parse_driver_info(node): def _parse_driver_info(node):
"""Gets the parameters required for ipmitool to access the node. """Gets the parameters required for ipmitool to access the node.

View File

@ -113,6 +113,12 @@ class TestV1Routing(api_base.BaseApiTest):
{'href': 'http://localhost/v1/conductors/', 'rel': 'self'}, {'href': 'http://localhost/v1/conductors/', 'rel': 'self'},
{'href': 'http://localhost/conductors/', 'rel': 'bookmark'} {'href': 'http://localhost/conductors/', 'rel': 'bookmark'}
], ],
'continue_inspection': [
{'href': 'http://localhost/v1/continue_inspection/',
'rel': 'self'},
{'href': 'http://localhost/continue_inspection/',
'rel': 'bookmark'}
],
'deploy_templates': [ 'deploy_templates': [
{'href': 'http://localhost/v1/deploy_templates/', {'href': 'http://localhost/v1/deploy_templates/',
'rel': 'self'}, 'rel': 'self'},

View File

@ -1,3 +1,4 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain # not use this file except in compliance with the License. You may obtain
# a copy of the License at # a copy of the License at
@ -16,6 +17,7 @@ from ironic.common import exception
from ironic.common import states from ironic.common import states
from ironic.conductor import inspection from ironic.conductor import inspection
from ironic.conductor import task_manager from ironic.conductor import task_manager
from ironic.drivers.modules import inspect_utils
from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import base as db_base
from ironic.tests.unit.objects import utils as obj_utils from ironic.tests.unit.objects import utils as obj_utils
@ -116,3 +118,56 @@ class TestInspectHardware(db_base.DbTestCase):
self.assertEqual('Unexpected exception of type RuntimeError: x', self.assertEqual('Unexpected exception of type RuntimeError: x',
node.last_error) node.last_error)
self.assertTrue(mock_inspect.called) self.assertTrue(mock_inspect.called)
@mock.patch('ironic.drivers.modules.fake.FakeInspect.continue_inspection',
autospec=True)
class TestContinueInspection(db_base.DbTestCase):
def setUp(self):
super().setUp()
self.node = obj_utils.create_test_node(
self.context, provision_state=states.INSPECTING)
self.inventory = {"test": "inventory"}
self.plugin_data = {"plugin": "data"}
@mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True)
def test_ok(self, mock_store, mock_continue):
with task_manager.acquire(self.context, self.node.id) as task:
inspection.continue_inspection(task, self.inventory,
self.plugin_data)
mock_continue.assert_called_once_with(task.driver.inspect,
task, self.inventory,
self.plugin_data)
mock_store.assert_called_once_with(task.node, self.inventory,
self.plugin_data, self.context)
self.node.refresh()
self.assertEqual(states.MANAGEABLE, self.node.provision_state)
@mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True)
def test_ok_asynchronous(self, mock_store, mock_continue):
mock_continue.return_value = states.INSPECTWAIT
with task_manager.acquire(self.context, self.node.id) as task:
inspection.continue_inspection(task, self.inventory,
self.plugin_data)
mock_continue.assert_called_once_with(task.driver.inspect,
task, self.inventory,
self.plugin_data)
mock_store.assert_not_called()
self.assertEqual(states.INSPECTWAIT, task.node.provision_state)
@mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True)
def test_failure(self, mock_store, mock_continue):
mock_continue.side_effect = exception.HardwareInspectionFailure("boom")
with task_manager.acquire(self.context, self.node.id) as task:
self.assertRaises(exception.HardwareInspectionFailure,
inspection.continue_inspection,
task, self.inventory, self.plugin_data)
mock_continue.assert_called_once_with(task.driver.inspect,
task, self.inventory,
self.plugin_data)
mock_store.assert_not_called()
self.node.refresh()
self.assertEqual(states.INSPECTFAIL, self.node.provision_state)
self.assertIn("boom", self.node.last_error)

View File

@ -8439,3 +8439,34 @@ class ConcurrentActionLimitTestCase(mgr_utils.ServiceSetUpMixin,
CONF.set_override('max_concurrent_clean', 4, group='conductor') CONF.set_override('max_concurrent_clean', 4, group='conductor')
self.service._concurrent_action_limit('cleaning') self.service._concurrent_action_limit('cleaning')
self.service._concurrent_action_limit('unprovisioning') self.service._concurrent_action_limit('unprovisioning')
@mgr_utils.mock_record_keepalive
class ContinueInspectionTestCase(mgr_utils.ServiceSetUpMixin,
db_base.DbTestCase):
@mock.patch.object(manager.ConductorManager, '_spawn_worker',
autospec=True)
def test_continue_ok(self, mock_spawn):
node = obj_utils.create_test_node(self.context,
provision_state=states.INSPECTWAIT)
self.service.continue_inspection(self.context, node.id,
{"test": "inventory"},
["plugin data"])
node.refresh()
self.assertEqual(states.INSPECTING, node.provision_state)
mock_spawn.assert_called_once_with(
self.service, inspection.continue_inspection, mock.ANY,
{"test": "inventory"}, ["plugin data"])
def test_wrong_state(self):
node = obj_utils.create_test_node(self.context,
provision_state=states.AVAILABLE)
exc = self.assertRaises(messaging.rpc.ExpectedException,
self.service.continue_inspection,
self.context, node.id,
{"test": "inventory"},
["plugin data"])
self.assertEqual(exception.NotFound, exc.exc_info[0])
node.refresh()
self.assertEqual(states.AVAILABLE, node.provision_state)

View File

@ -13,14 +13,16 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import socket
from unittest import mock from unittest import mock
from oslo_utils import importutils from oslo_utils import importutils
from oslo_utils import uuidutils
import swiftclient.exceptions import swiftclient.exceptions
from ironic.common import context as ironic_context from ironic.common import context as ironic_context
from ironic.common import exception from ironic.common import exception
from ironic.common import states
from ironic.common import swift from ironic.common import swift
from ironic.conductor import task_manager from ironic.conductor import task_manager
from ironic.conf import CONF from ironic.conf import CONF
@ -293,3 +295,210 @@ class IntrospectionDataStorageFunctionsTestCase(db_base.DbTestCase):
self.assertRaises(exception.SwiftObjectNotFoundError, self.assertRaises(exception.SwiftObjectNotFoundError,
utils._get_inspection_data_from_swift, utils._get_inspection_data_from_swift,
self.node.uuid) self.node.uuid)
class LookupNodeTestCase(db_base.DbTestCase):
def setUp(self):
super().setUp()
self.bmc = '192.0.2.1'
self.node = obj_utils.create_test_node(
self.context,
driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]},
provision_state=states.INSPECTWAIT)
self.macs = ['11:22:33:44:55:66', '12:34:56:78:90:ab']
self.unknown_mac = '66:55:44:33:22:11'
self.ports = [
obj_utils.create_test_port(self.context,
uuid=uuidutils.generate_uuid(),
node_id=self.node.id,
address=addr)
for addr in self.macs
]
self.bmc2 = '1.2.1.2'
self.mac2 = '00:11:00:11:00:11'
self.node2 = obj_utils.create_test_node(
self.context,
uuid=uuidutils.generate_uuid(),
driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc2]},
provision_state=states.INSPECTWAIT)
obj_utils.create_test_port(self.context,
node_id=self.node2.id,
address=self.mac2)
def test_no_input(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [], [], None)
def test_by_macs(self):
result = utils.lookup_node(self.context, self.macs[::-1], [], None)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_macs_partial(self):
macs = [self.macs[1], self.unknown_mac]
result = utils.lookup_node(self.context, macs, [], None)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_mac_not_found(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [self.unknown_mac], [], None)
def test_by_mac_wrong_state(self):
self.node.provision_state = states.AVAILABLE
self.node.save()
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, self.macs, [], None)
def test_conflicting_macs(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [self.macs[0], self.mac2], [], None)
def test_by_bmc(self):
result = utils.lookup_node(self.context, [], ['192.0.2.1'], None)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_bmc_and_mac(self):
result = utils.lookup_node(
self.context, [self.macs[0]], [self.bmc], None)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_unknown_bmc_and_mac(self):
result = utils.lookup_node(
self.context, [self.unknown_mac], [self.bmc], None)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_bmc_and_mac_and_uuid(self):
result = utils.lookup_node(
self.context, [self.macs[0]], [self.bmc], self.node.uuid)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_bmc_not_found(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [], ['192.168.1.1'], None)
def test_by_bmc_wrong_state(self):
self.node.provision_state = states.AVAILABLE
self.node.save()
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [], [self.bmc], None)
def test_conflicting_macs_and_bmc(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, self.macs, [self.bmc2], None)
def test_by_uuid(self):
result = utils.lookup_node(self.context, [], [], self.node.uuid)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_uuid_and_unknown_macs(self):
result = utils.lookup_node(
self.context, [self.unknown_mac], [], self.node.uuid)
self.assertEqual(self.node.uuid, result.uuid)
def test_by_uuid_not_found(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [], [], uuidutils.generate_uuid())
def test_by_uuid_wrong_state(self):
self.node.provision_state = states.AVAILABLE
self.node.save()
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, [], [], self.node.uuid)
def test_conflicting_macs_and_uuid(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, self.macs, [], self.node2.uuid)
def test_conflicting_bmc_and_uuid(self):
self.assertRaises(exception.NotFound, utils.lookup_node,
self.context, self.macs, [self.bmc], self.node2.uuid)
class GetBMCAddressesTestCase(db_base.DbTestCase):
def test_localhost_ignored(self):
node = obj_utils.create_test_node(
self.context,
driver_info={'ipmi_address': '127.0.0.1'})
self.assertEqual(set(), utils._get_bmc_addresses(node))
def test_localhost_as_url_ignored(self):
node = obj_utils.create_test_node(
self.context,
driver_info={'redfish_address': 'https://localhost/redfish'})
self.assertEqual(set(), utils._get_bmc_addresses(node))
def test_normal_ip(self):
node = obj_utils.create_test_node(
self.context,
driver_info={'ipmi_address': '192.0.2.1'})
self.assertEqual({'192.0.2.1'}, utils._get_bmc_addresses(node))
def test_normal_ip_as_url(self):
node = obj_utils.create_test_node(
self.context,
driver_info={'redfish_address': 'https://192.0.2.1/redfish'})
self.assertEqual({'192.0.2.1'}, utils._get_bmc_addresses(node))
@mock.patch.object(socket, 'getaddrinfo', autospec=True)
def test_resolved_host(self, mock_getaddrinfo):
mock_getaddrinfo.return_value = [
(socket.AF_INET6, socket.SOCK_STREAM, socket.SOL_TCP,
'', ('2001:db8::42', None)),
(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP,
'', ('192.0.2.1', None)),
]
node = obj_utils.create_test_node(
self.context,
driver_info={'ipmi_address': 'example.com'})
self.assertEqual({'example.com', '192.0.2.1', '2001:db8::42'},
utils._get_bmc_addresses(node))
mock_getaddrinfo.assert_called_once_with(
'example.com', None, proto=socket.SOL_TCP)
@mock.patch.object(socket, 'getaddrinfo', autospec=True)
def test_resolved_host_in_url(self, mock_getaddrinfo):
mock_getaddrinfo.return_value = [
(socket.AF_INET6, socket.SOCK_STREAM, socket.SOL_TCP,
'', ('2001:db8::42', None)),
(socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP,
'', ('192.0.2.1', None)),
]
node = obj_utils.create_test_node(
self.context,
driver_info={'redfish_address': 'https://example.com:8080/v1'})
self.assertEqual({'example.com', '192.0.2.1', '2001:db8::42'},
utils._get_bmc_addresses(node))
mock_getaddrinfo.assert_called_once_with(
'example.com', None, proto=socket.SOL_TCP)
class LookupCacheTestCase(db_base.DbTestCase):
def setUp(self):
super().setUp()
self.bmc = '192.0.2.1'
self.node = obj_utils.create_test_node(
self.context,
driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]},
provision_state=states.INSPECTWAIT)
def test_clear(self):
result = utils.clear_lookup_addresses(self.node)
self.assertEqual([self.bmc], result)
self.assertEqual({}, self.node.driver_internal_info)
@mock.patch.object(utils, '_get_bmc_addresses', autospec=True)
def test_new_value(self, mock_get_addr):
mock_get_addr.return_value = {'192.0.2.42'}
utils.cache_lookup_addresses(self.node)
self.assertEqual({utils.LOOKUP_CACHE_FIELD: ['192.0.2.42']},
self.node.driver_internal_info)
@mock.patch.object(utils, '_get_bmc_addresses', autospec=True)
def test_replace_with_empty(self, mock_get_addr):
mock_get_addr.return_value = set()
utils.cache_lookup_addresses(self.node)
self.assertEqual({}, self.node.driver_internal_info)