From 0370f5ac97dfb0d467398fd778fa4b83387710e4 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Wed, 1 Mar 2023 14:50:30 +0100 Subject: [PATCH] Migrate the inspector's /continue API This change creates all necessary parts to processing inspection data: * New API /v1/continue_inspection Depending on the API version, either behaves like the inspector's API or (new version) adds the lookup functionality on top. The lookup process is migrated from ironic-inspector with minor changes. It takes MAC addresses, BMC addresses and (optionally) a node UUID and tries to find a single node in INSPECTWAIT state that satisfies all of these. Any failure results in HTTP 404. To make lookup faster, the resolved BMC addresses are cached in advance. * New RPC continue_inspection Essentially, checks the provision state again and delegates to the inspect interface. * New inspect interface call continue_inspection The base version does nothing. Since we don't yet have in-band inspection in Ironic proper, the only actual implementation is added to the existing "inspector" interface that works by doing a call to ironic-inspector. Story: #2010275 Task: #46208 Change-Id: Ia3f5bb9d1845d6b8fab30232a72b5a360a5a56d2 --- .../contributor/webapi-version-history.rst | 8 +- ironic/api/config.py | 1 + ironic/api/controllers/v1/__init__.py | 6 + ironic/api/controllers/v1/ramdisk.py | 145 ++++++++++-- ironic/api/controllers/v1/utils.py | 15 ++ ironic/api/controllers/v1/versions.py | 4 +- ironic/common/policy.py | 8 +- ironic/common/release_mappings.py | 4 +- ironic/common/states.py | 3 + ironic/conductor/inspection.py | 43 ++++ ironic/conductor/manager.py | 37 ++- ironic/conductor/rpcapi.py | 18 +- ironic/conductor/utils.py | 7 +- ironic/drivers/base.py | 14 ++ ironic/drivers/modules/inspect_utils.py | 171 ++++++++++++++ ironic/drivers/modules/inspector/interface.py | 26 +++ ironic/drivers/modules/ipmitool.py | 8 + .../unit/api/controllers/v1/test_root.py | 6 + .../tests/unit/conductor/test_inspection.py | 55 +++++ ironic/tests/unit/conductor/test_manager.py | 31 +++ .../drivers/modules/test_inspect_utils.py | 211 +++++++++++++++++- 21 files changed, 791 insertions(+), 30 deletions(-) diff --git a/doc/source/contributor/webapi-version-history.rst b/doc/source/contributor/webapi-version-history.rst index 0395cbc767..b9f5f171e2 100644 --- a/doc/source/contributor/webapi-version-history.rst +++ b/doc/source/contributor/webapi-version-history.rst @@ -2,8 +2,14 @@ REST API Version History ======================== +1.84 (Bobcat) +------------- + +Add callback endpoint for in-band inspection ``/v1/continue_inspection``. +This endpoint is not designed to be used by regular users. + 1.83 (Bobcat) ----------------------- +------------- This version adds a concept of child nodes through the use of a ``parent_node`` field which can be set on a node. diff --git a/ironic/api/config.py b/ironic/api/config.py index b053ea1707..5026747631 100644 --- a/ironic/api/config.py +++ b/ironic/api/config.py @@ -32,6 +32,7 @@ app = { # IPA ramdisk methods '/v1/lookup', '/v1/heartbeat/[a-z0-9\\-]+', + '/v1/continue_inspection', ], } diff --git a/ironic/api/controllers/v1/__init__.py b/ironic/api/controllers/v1/__init__.py index c352761c76..e0b2200f7f 100644 --- a/ironic/api/controllers/v1/__init__.py +++ b/ironic/api/controllers/v1/__init__.py @@ -77,6 +77,11 @@ VERSIONED_CONTROLLERS = { 'events': utils.allow_expose_events, 'deploy_templates': utils.allow_deploy_templates, 'shards': utils.allow_shards_endpoint, + # NOTE(dtantsur): continue_inspection is available in 1.1 as a + # compatibility hack to make it usable with IPA without changes. + # Hide this fact from consumers since it was not actually available + # back in Kilo. + 'continue_inspection': utils.new_continue_inspection_endpoint, } @@ -125,6 +130,7 @@ class Controller(object): 'events': event.EventsController(), 'deploy_templates': deploy_template.DeployTemplatesController(), 'shards': shard.ShardController(), + 'continue_inspection': ramdisk.ContinueInspectionController(), } @method.expose() diff --git a/ironic/api/controllers/v1/ramdisk.py b/ironic/api/controllers/v1/ramdisk.py index b98eb7dc21..0840a13731 100644 --- a/ironic/api/controllers/v1/ramdisk.py +++ b/ironic/api/controllers/v1/ramdisk.py @@ -21,12 +21,14 @@ from pecan import rest from ironic import api from ironic.api.controllers.v1 import node as node_ctl from ironic.api.controllers.v1 import utils as api_utils +from ironic.api.controllers.v1 import versions from ironic.api import method from ironic.common import args from ironic.common import exception from ironic.common.i18n import _ from ironic.common import states from ironic.common import utils +from ironic.drivers.modules import inspect_utils from ironic import objects @@ -66,6 +68,31 @@ def convert_with_links(node): return {'node': node, 'config': config(token)} +def get_valid_mac_addresses(addresses, node_uuid=None): + if addresses is None: + addresses = [] + + valid_addresses = [] + invalid_addresses = [] + for addr in addresses: + try: + mac = utils.validate_and_normalize_mac(addr) + valid_addresses.append(mac) + except exception.InvalidMAC: + invalid_addresses.append(addr) + + if invalid_addresses: + node_log = ('' if not node_uuid + else '(Node UUID: %s)' % node_uuid) + LOG.warning('The following MAC addresses "%(addrs)s" are ' + 'invalid and will be ignored by the lookup ' + 'request %(node)s', + {'addrs': ', '.join(invalid_addresses), + 'node': node_log}) + + return valid_addresses + + class LookupController(rest.RestController): """Controller handling node lookup for a deploy ramdisk.""" @@ -100,27 +127,7 @@ class LookupController(rest.RestController): api_utils.check_policy('baremetal:driver:ipa_lookup') # Validate the list of MAC addresses - if addresses is None: - addresses = [] - - valid_addresses = [] - invalid_addresses = [] - for addr in addresses: - try: - mac = utils.validate_and_normalize_mac(addr) - valid_addresses.append(mac) - except exception.InvalidMAC: - invalid_addresses.append(addr) - - if invalid_addresses: - node_log = ('' if not node_uuid - else '(Node UUID: %s)' % node_uuid) - LOG.warning('The following MAC addresses "%(addrs)s" are ' - 'invalid and will be ignored by the lookup ' - 'request %(node)s', - {'addrs': ', '.join(invalid_addresses), - 'node': node_log}) - + valid_addresses = get_valid_mac_addresses(addresses) if not valid_addresses and not node_uuid: raise exception.IncompleteLookup() @@ -254,3 +261,99 @@ class HeartbeatController(rest.RestController): api.request.context, rpc_node.uuid, callback_url, agent_version, agent_token, agent_verify_ca, agent_status, agent_status_message, topic=topic) + + +DATA_VALIDATOR = args.schema({ + 'type': 'object', + 'properties': { + # This validator defines a minimal acceptable inventory. + 'inventory': { + 'type': 'object', + 'properties': { + 'bmc_address': {'type': 'string'}, + 'bmc_v6address': {'type': 'string'}, + 'interfaces': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'mac_address': {'type': 'string'}, + }, + 'required': ['mac_address'], + 'additionalProperties': True, + }, + 'minItems': 1, + }, + }, + 'required': ['interfaces'], + 'additionalProperties': True, + }, + }, + 'required': ['inventory'], + 'additionalProperties': True, +}) + + +class ContinueInspectionController(rest.RestController): + """Controller handling inspection data from deploy ramdisk.""" + + @method.expose(status_code=http_client.ACCEPTED) + @method.body('data') + @args.validate(data=DATA_VALIDATOR, node_uuid=args.uuid) + def post(self, data, node_uuid=None): + """Process a introspection data from the deploy ramdisk. + + :param data: Introspection data. + :param node_uuid: UUID of a node. + :raises: InvalidParameterValue if node_uuid is a valid UUID. + :raises: NoValidHost if RPC topic for node could not be retrieved. + :raises: NotFound if requested API version does not allow this + endpoint or if lookup fails. + """ + if (not api_utils.allow_continue_inspection_endpoint() + # Node UUID support is a new addition + or (node_uuid + and not api_utils.new_continue_inspection_endpoint())): + raise exception.NotFound( + # This is a small lie: 1.1 is accepted as well, but no need + # to really advertise this fact, it's only for compatibility. + _('API version 1.%d or newer is required') + % versions.MINOR_83_CONTINUE_INSPECTION) + + api_utils.check_policy('baremetal:node:ipa_continue_inspection') + + inventory = data.pop('inventory') + macs = get_valid_mac_addresses( + iface['mac_address'] for iface in inventory['interfaces']) + bmc_addresses = list( + filter(None, (inventory.get('bmc_address'), + inventory.get('bmc_v6address'))) + ) + if not macs and not bmc_addresses and not node_uuid: + raise exception.BadRequest(_('No lookup information provided')) + + rpc_node = inspect_utils.lookup_node( + api.request.context, macs, bmc_addresses, node_uuid=node_uuid) + + try: + topic = api.request.rpcapi.get_topic_for(rpc_node) + except exception.NoValidHost as e: + e.code = http_client.BAD_REQUEST + raise + + if api_utils.new_continue_inspection_endpoint(): + # This has to happen before continue_inspection since processing + # the data may take significant time, and creating a token required + # a lock on the node. + rpc_node = api.request.rpcapi.get_node_with_token( + api.request.context, rpc_node.uuid, topic=topic) + + api.request.rpcapi.continue_inspection( + api.request.context, rpc_node.uuid, inventory=inventory, + plugin_data=data, topic=topic) + + if api_utils.new_continue_inspection_endpoint(): + return convert_with_links(rpc_node) + else: + # Compatibility with ironic-inspector + return {'uuid': rpc_node.uuid} diff --git a/ironic/api/controllers/v1/utils.py b/ironic/api/controllers/v1/utils.py index 48ca79ab6d..feaefbdb78 100644 --- a/ironic/api/controllers/v1/utils.py +++ b/ironic/api/controllers/v1/utils.py @@ -1986,3 +1986,18 @@ def check_allow_clean_disable_ramdisk(target, disable_ramdisk): def allow_shards_endpoint(): """Check if shards endpoint is available.""" return api.request.version.minor >= versions.MINOR_82_NODE_SHARD + + +def new_continue_inspection_endpoint(): + """Check if /v1/continue_inspection endpoint is explicitly requested.""" + return api.request.version.minor >= versions.MINOR_84_CONTINUE_INSPECTION + + +def allow_continue_inspection_endpoint(): + """Check if /v1/continue_inspection endpoint is available. + + As a special exception, we allow it in the base version so that the API + can be used as a drop-in replacement for the Inspector's API. + """ + return (new_continue_inspection_endpoint() + or api.request.version.minor == versions.MINOR_1_INITIAL_VERSION) diff --git a/ironic/api/controllers/v1/versions.py b/ironic/api/controllers/v1/versions.py index 8b33c9b306..827f549a63 100644 --- a/ironic/api/controllers/v1/versions.py +++ b/ironic/api/controllers/v1/versions.py @@ -121,6 +121,7 @@ BASE_VERSION = 1 # v1.81: Add node inventory # v1.82: Add node sharding capability # v1.83: Add child node modeling +# v1.84: Add ramdisk callback to continue inspection. MINOR_0_JUNO = 0 MINOR_1_INITIAL_VERSION = 1 MINOR_2_AVAILABLE_STATE = 2 @@ -205,6 +206,7 @@ MINOR_80_PROJECT_CREATE_DELETE_NODE = 80 MINOR_81_NODE_INVENTORY = 81 MINOR_82_NODE_SHARD = 82 MINOR_83_PARENT_CHILD_NODES = 83 +MINOR_84_CONTINUE_INSPECTION = 84 # When adding another version, update: # - MINOR_MAX_VERSION @@ -212,7 +214,7 @@ MINOR_83_PARENT_CHILD_NODES = 83 # explanation of what changed in the new version # - common/release_mappings.py, RELEASE_MAPPING['master']['api'] -MINOR_MAX_VERSION = MINOR_83_PARENT_CHILD_NODES +MINOR_MAX_VERSION = MINOR_84_CONTINUE_INSPECTION # String representations of the minor and maximum versions _MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION) diff --git a/ironic/common/policy.py b/ironic/common/policy.py index 2d1a549c8e..ac25de2696 100644 --- a/ironic/common/policy.py +++ b/ironic/common/policy.py @@ -1443,6 +1443,12 @@ utility_policies = [ operations=[{'path': '/lookup', 'method': 'GET'}], deprecated_rule=deprecated_ipa_lookup ), + policy.DocumentedRuleDefault( + name='baremetal:driver:ipa_continue_inspection', + check_str='', + description='Receive inspection data from the ramdisk', + operations=[{'path': '/continue_inspection', 'method': 'POST'}], + ), ] @@ -1720,7 +1726,7 @@ allocation_policies = [ # The latter is more for projects and services using admin project # rights. Specific checking because of the expanded rights of # this functionality. - check_str=('(rule:is_member and role:baremetal_admin) or (is_admin_project:True and role:admin)'), # noqa + check_str=('(rule:is_member and role:baremetal_admin) or (is_admin_project:True and role:admin)'), # noqa scope_types=['project'], description=('Logical restrictor to prevent legacy allocation rule ' 'missuse - Requires blank allocations to originate from ' diff --git a/ironic/common/release_mappings.py b/ironic/common/release_mappings.py index eb2ddd313f..ab8d3aa464 100644 --- a/ironic/common/release_mappings.py +++ b/ironic/common/release_mappings.py @@ -574,8 +574,8 @@ RELEASE_MAPPING = { } }, 'master': { - 'api': '1.83', - 'rpc': '1.55', + 'api': '1.84', + 'rpc': '1.56', 'objects': { 'Allocation': ['1.1'], 'BIOSSetting': ['1.1'], diff --git a/ironic/common/states.py b/ironic/common/states.py index f2238b41b5..ae87b32668 100644 --- a/ironic/common/states.py +++ b/ironic/common/states.py @@ -482,6 +482,9 @@ machine.add_transition(INSPECTWAIT, INSPECTFAIL, 'fail') # Inspection is aborted. machine.add_transition(INSPECTWAIT, INSPECTFAIL, 'abort') +# Inspection is continued. +machine.add_transition(INSPECTWAIT, INSPECTING, 'resume') + # Move the node to manageable state for any other # action. machine.add_transition(INSPECTFAIL, MANAGEABLE, 'manage') diff --git a/ironic/conductor/inspection.py b/ironic/conductor/inspection.py index 53c76e99dd..05a91af195 100644 --- a/ironic/conductor/inspection.py +++ b/ironic/conductor/inspection.py @@ -20,6 +20,7 @@ from ironic.common.i18n import _ from ironic.common import states from ironic.conductor import task_manager from ironic.conductor import utils +from ironic.drivers.modules import inspect_utils LOG = log.getLogger(__name__) @@ -106,3 +107,45 @@ def abort_inspection(task): task.process_event('abort') LOG.info('Successfully aborted inspection of node %(node)s', {'node': node.uuid}) + + +@task_manager.require_exclusive_lock +def continue_inspection(task, inventory, plugin_data): + """Continue inspection for the node.""" + node = task.node + LOG.debug('Inventory for node %(node)s: %(data)s', + {'node': node.uuid, 'data': inventory}) + + try: + result = task.driver.inspect.continue_inspection( + task, inventory, plugin_data) + + if result == states.INSPECTWAIT: + if task.node.provision_state == states.INSPECTING: + task.process_event('wait') + LOG.debug('Waiting for inspection data to be processed ' + 'asynchronously for node %s', node.uuid) + return + + # NOTE(dtantsur): this is done *after* processing to allow + # modifications, especially to plugin_data. + inspect_utils.store_inspection_data( + node, inventory, plugin_data, context=task.context) + except exception.UnsupportedDriverExtension: + with excutils.save_and_reraise_exception(): + intf_name = task.driver.inspect.__class__.__name__ + LOG.error('Inspect interface %(intf)s does not ' + 'support processing inspection data for node %(node)s', + {'intf': intf_name, 'node': node.uuid}) + except Exception as e: + with excutils.save_and_reraise_exception(): + LOG.exception('Error when processing inspection data for ' + 'node %(node)s', {'node': node.uuid}) + error = _('Failed to finish inspection: %s') % e + utils.node_history_record(task.node, event=error, + event_type=states.INTROSPECTION, + error=True) + task.process_event('fail') + + task.process_event('done') + LOG.info('Successfully finished inspection of node %s', node.uuid) diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index ef850753dc..076fea2441 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -93,7 +93,7 @@ class ConductorManager(base_manager.BaseConductorManager): # NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's. # NOTE(pas-ha): This also must be in sync with # ironic.common.release_mappings.RELEASE_MAPPING['master'] - RPC_API_VERSION = '1.55' + RPC_API_VERSION = '1.56' target = messaging.Target(version=RPC_API_VERSION) @@ -3652,6 +3652,41 @@ class ConductorManager(base_manager.BaseConductorManager): raise exception.ConcurrentActionLimit( task_type=action) + @METRICS.timer('ConductorManager.continue_inspection') + @messaging.expected_exceptions(exception.NodeLocked, + exception.NotFound, + exception.Invalid) + def continue_inspection(self, context, node_id, inventory, + plugin_data=None): + """Continue in-band inspection. + + :param context: request context. + :param node_id: node ID or UUID. + :param inventory: hardware inventory from the node. + :param plugin_data: optional plugin-specific data. + :raises: NodeLocked if node is locked by another conductor. + :raises: NotFound if node is in invalid state. + """ + LOG.debug("RPC continue_inspection called for the node %(node_id)s", + {'node_id': node_id}) + with task_manager.acquire(context, node_id, + purpose='continue inspection', + shared=False) as task: + # TODO(dtantsur): support active state (re-)inspection + if task.node.provision_state != states.INSPECTWAIT: + LOG.error('Refusing to process inspection data for node ' + '%(node)s in invalid state %(state)s', + {'node': task.node.uuid, + 'state': task.node.provision_state}) + raise exception.NotFound() + + task.process_event( + 'resume', + callback=self._spawn_worker, + call_args=(inspection.continue_inspection, + task, inventory, plugin_data), + err_handler=utils.provisioning_error_handler) + @METRICS.timer('get_vendor_passthru_metadata') def get_vendor_passthru_metadata(route_dict): diff --git a/ironic/conductor/rpcapi.py b/ironic/conductor/rpcapi.py index 47ef4c6c96..5a8582f888 100644 --- a/ironic/conductor/rpcapi.py +++ b/ironic/conductor/rpcapi.py @@ -152,12 +152,13 @@ class ConductorAPI(object): | 1.54 - Added optional agent_status and agent_status_message to heartbeat | 1.55 - Added change_node_boot_mode + | 1.56 - Added continue_inspection """ # NOTE(rloo): This must be in sync with manager.ConductorManager's. # NOTE(pas-ha): This also must be in sync with # ironic.common.release_mappings.RELEASE_MAPPING['master'] - RPC_API_VERSION = '1.55' + RPC_API_VERSION = '1.56' def __init__(self, topic=None): super(ConductorAPI, self).__init__() @@ -1393,3 +1394,18 @@ class ConductorAPI(object): """ cctxt = self._prepare_call(topic=topic, version='1.49') return cctxt.call(context, 'get_node_with_token', node_id=node_id) + + def continue_inspection(self, context, node_id, inventory, + plugin_data=None, topic=None): + """Continue in-band inspection. + + :param context: request context. + :param node_id: node ID or UUID. + :param inventory: hardware inventory from the node. + :param plugin_data: optional plugin-specific data. + :param topic: RPC topic. Defaults to self.topic. + :raises: NodeLocked if node is locked by another conductor. + """ + cctxt = self._prepare_call(topic=topic, version='1.56') + return cctxt.call(context, 'continue_inspection', node_id=node_id, + inventory=inventory, plugin_data=plugin_data) diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 39886bfd87..0f7d20fdb0 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -411,7 +411,12 @@ def provisioning_error_handler(e, node, provision_state, node.provision_state = provision_state node.target_provision_state = target_provision_state error = (_("No free conductor workers available")) - node_history_record(node, event=error, event_type=states.PROVISIONING, + if provision_state in (states.INSPECTING, states.INSPECTWAIT, + states.INSPECTFAIL): + event_type = states.INTROSPECTION + else: + event_type = states.PROVISIONING + node_history_record(node, event=error, event_type=event_type, error=True) node.save() LOG.warning("No free conductor workers available to perform " diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index 953ac056e5..d89ce249ec 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -1257,6 +1257,20 @@ class InspectInterface(BaseInterface): raise exception.UnsupportedDriverExtension( driver=task.node.driver, extension='abort') + def continue_inspection(self, task, inventory, plugin_data=None): + """Continue in-band hardware inspection. + + Should not be implemented for purely out-of-band implementations. + + :param task: a task from TaskManager. + :param inventory: hardware inventory from the node. + :param plugin_data: optional plugin-specific data. + :raises: UnsupportedDriverExtension, if the method is not implemented + by specific inspect interface. + """ + raise exception.UnsupportedDriverExtension( + driver=task.node.driver, extension='abort') + def cache_bios_settings(func): """A decorator to cache bios settings after running the function. diff --git a/ironic/drivers/modules/inspect_utils.py b/ironic/drivers/modules/inspect_utils.py index f87fcc82b1..2a66edce9e 100644 --- a/ironic/drivers/modules/inspect_utils.py +++ b/ironic/drivers/modules/inspect_utils.py @@ -13,13 +13,19 @@ # License for the specific language governing permissions and limitations # under the License. +import socket +import urllib + from oslo_log import log as logging from oslo_utils import netutils import swiftclient.exceptions from ironic.common import exception +from ironic.common import states from ironic.common import swift +from ironic.common import utils from ironic.conf import CONF +from ironic.drivers.modules import ipmitool from ironic import objects from ironic.objects import node_inventory @@ -212,3 +218,168 @@ def _get_inspection_data_from_swift(node_uuid): container=container, operation='get') return {"inventory": inventory_data, "plugin_data": plugin_data} + + +LOOKUP_CACHE_FIELD = 'lookup_bmc_addresses' + + +def lookup_node(context, mac_addresses, bmc_addresses, node_uuid=None): + """Do a node lookup by the information from the inventory. + + :param context: Request context + :param mac_addresses: List of MAC addresses. + :param bmc_addresses: List of BMC (realistically, IPMI) addresses. + :param node_uuid: Node UUID (if known). + :raises: NotFound with a generic message for all failures to avoid + disclosing any information. + """ + node = None + if node_uuid: + try: + node = objects.Node.get_by_uuid(context, node_uuid) + except exception.NotFound: + # NOTE(dtantsur): we are reraising the same exception to make sure + # we don't disclose the difference between nodes that are not found + # at all and nodes in a wrong state by different error messages. + raise exception.NotFound() + + if mac_addresses: + try: + node = objects.Node.get_by_port_addresses(context, mac_addresses) + except exception.NotFound as exc: + # The exception has enough context already, just log it and move on + LOG.debug("Lookup for inspection: %s", exc) + else: + if node_uuid and node.uuid != node_uuid: + LOG.error('Conflict on inspection lookup: node %(node1)s ' + 'does not match MAC addresses (%(macs)s), which ' + 'belong to node %(node2)s. This may be a sign of ' + 'incorrectly created ports.', + {'node1': node_uuid, + 'node2': node.uuid, + 'macs': ', '.join(mac_addresses)}) + raise exception.NotFound() + + # TODO(dtantsur): support active state inspection + if node and node.provision_state != states.INSPECTWAIT: + LOG.error('Node %(node)s was found during inspection lookup ' + 'with MAC addresses %(macs)s, but it is in ' + 'provision state %(state)s', + {'node': node.uuid, + 'macs': ', '.join(mac_addresses), + 'state': node.provision_state}) + raise exception.NotFound() + + if bmc_addresses: + for candidate in objects.Node.list( + context, + filters={'provision_state': states.INSPECTWAIT}, + fields=['uuid', 'driver_internal_info']): + # This field has to be populated on inspection start + for addr in candidate.driver_internal_info.get( + LOOKUP_CACHE_FIELD) or (): + if addr in bmc_addresses: + break + else: + continue + + if node and candidate.uuid != node.uuid: + LOG.error('Conflict on inspection lookup: nodes %(node1)s ' + 'and %(node2)s both satisfy MAC addresses ' + '(%(macs)s) and BMC address(s) (%(bmc)s). The cause ' + 'may be ports attached to a wrong node.', + {'node1': node.uuid, + 'node2': candidate.uuid, + 'macs': ', '.join(mac_addresses), + 'bmc': ', '.join(bmc_addresses)}) + raise exception.NotFound() + + try: + # Fetch the complete object now. + node = objects.Node.get_by_uuid(context, candidate.uuid) + except exception.NotFound: + pass # Deleted in-between? + + if not node: + LOG.error('No nodes satisfy MAC addresses (%(macs)s) and BMC ' + 'address(s) (%(bmc)s) during inspection lookup', + {'macs': ', '.join(mac_addresses), + 'bmc': ', '.join(bmc_addresses)}) + raise exception.NotFound() + + LOG.debug('Inspection lookup succeeded for node %(node)s using MAC ' + 'addresses %(mac)s and BMC addresses %(bmc)s', + {'node': node.uuid, 'mac': mac_addresses, 'bmc': bmc_addresses}) + return node + + +def _get_bmc_addresses(node): + """Get the BMC address defined in the node's driver_info. + + All valid hosts are returned along with their v4 and v6 IP addresses. + + :param node: Node object with defined driver_info dictionary + :return: a set with suitable addresses + """ + result = set() + + # FIXME(dtantsur): this extremely lame process is adapted from + # ironic-inspector. Now that it's in Ironic proper, we need to replace it + # with something using information from hardware types. + for name, address in node.driver_info.items(): + if not name.endswith('_address'): + continue + + # NOTE(sambetts): IPMI address is useless to us if bridging is enabled + # so just ignore it. + if (name.startswith('ipmi_') + and ipmitool.is_bridging_enabled(node)): + LOG.debug('Will not used %(field)s %(addr)s for lookup since ' + 'IPMI bridging is enabled for node %(node)s', + {'addr': address, 'field': name, 'node': node.uuid}) + continue + + if '//' in address: + address = urllib.parse.urlparse(address).hostname + + try: + addrinfo = socket.getaddrinfo(address, None, proto=socket.SOL_TCP) + except socket.gaierror as exc: + LOG.warning('Failed to resolve the hostname (%(addr)s) in ' + '%(field)s of node %(node)s: %(exc)s', + {'addr': address, 'field': name, 'node': node.uuid, + 'exc': exc}) + continue + + for *other, sockaddr in addrinfo: + ip = sockaddr[0] + if utils.is_loopback(ip): + LOG.warning('Ignoring loopback %(field)s %(addr)s ' + 'for node %(node)s', + {'addr': ip, 'field': name, 'node': node.uuid}) + else: + result.add(ip) + + if not utils.is_loopback(address): + result.add(address) + + return result + + +def cache_lookup_addresses(node): + """Cache lookup addresses for a quick access.""" + addresses = _get_bmc_addresses(node) + if addresses: + LOG.debug('Will use the following BMC addresses for inspection lookup ' + 'of node %(node)s: %(addr)s', + {'node': node.uuid, 'addr': addresses}) + node.set_driver_internal_info(LOOKUP_CACHE_FIELD, list(addresses)) + else: + LOG.debug('No BMC addresses to use for inspection lookup of node %s', + node.uuid) + clear_lookup_addresses(node) + + +def clear_lookup_addresses(node): + """Remove lookup addresses cached on the node.""" + return node.del_driver_internal_info(LOOKUP_CACHE_FIELD) diff --git a/ironic/drivers/modules/inspector/interface.py b/ironic/drivers/modules/inspector/interface.py index 75d51f9711..d86c55c6da 100644 --- a/ironic/drivers/modules/inspector/interface.py +++ b/ironic/drivers/modules/inspector/interface.py @@ -213,6 +213,10 @@ class Inspector(base.InspectInterface): utils.set_node_nested_field(task.node, 'driver_internal_info', _IRONIC_MANAGES_BOOT, ironic_manages_boot) + # Make this interface work with the Ironic own /continue_inspection + # endpoint to simplify migration to the new in-band inspection + # implementation. + inspect_utils.cache_lookup_addresses(task.node) task.node.save() LOG.debug('Starting inspection for node %(uuid)s using ' @@ -239,6 +243,8 @@ class Inspector(base.InspectInterface): LOG.debug('Aborting inspection for node %(uuid)s using ' 'ironic-inspector', {'uuid': node_uuid}) client.get_client(task.context).abort_introspection(node_uuid) + if inspect_utils.clear_lookup_addresses(task.node): + task.node.save() @periodics.node_periodic( purpose='checking hardware inspection status', @@ -249,6 +255,24 @@ class Inspector(base.InspectInterface): """Periodic task checking results of inspection.""" _check_status(task) + def continue_inspection(self, task, inventory, plugin_data=None): + """Continue in-band hardware inspection. + + This implementation simply defers to ironic-inspector. It only exists + to simplify the transition to Ironic-native in-band inspection. + + :param task: a task from TaskManager. + :param inventory: hardware inventory from the node. + :param plugin_data: optional plugin-specific data. + """ + cli = client.get_client(task.context) + endpoint = _get_callback_endpoint(cli) + data = dict(plugin_data, inventory=inventory) # older format + task.process_event('wait') + task.downgrade_lock() + cli.post(endpoint, json=data) + return states.INSPECTWAIT + def _start_inspection(node_uuid, context): """Call to inspector to start inspection.""" @@ -300,6 +324,8 @@ def _check_status(task): task.upgrade_lock() node = task.node + inspect_utils.clear_lookup_addresses(node) + if status.error: LOG.error('Inspection failed for node %(uuid)s with error: %(err)s', {'uuid': node.uuid, 'err': status.error}) diff --git a/ironic/drivers/modules/ipmitool.py b/ironic/drivers/modules/ipmitool.py index 218f90a5c0..0085cf2f3e 100644 --- a/ironic/drivers/modules/ipmitool.py +++ b/ironic/drivers/modules/ipmitool.py @@ -313,6 +313,14 @@ def _make_password_file(password): f.close() +def is_bridging_enabled(node): + """Check if IPMI bridging is enabled. + + This call is used in the inspector lookup. + """ + return node.driver_info.get("ipmi_bridging", "no") != "no" + + def _parse_driver_info(node): """Gets the parameters required for ipmitool to access the node. diff --git a/ironic/tests/unit/api/controllers/v1/test_root.py b/ironic/tests/unit/api/controllers/v1/test_root.py index 229e88622f..903f8d8549 100644 --- a/ironic/tests/unit/api/controllers/v1/test_root.py +++ b/ironic/tests/unit/api/controllers/v1/test_root.py @@ -113,6 +113,12 @@ class TestV1Routing(api_base.BaseApiTest): {'href': 'http://localhost/v1/conductors/', 'rel': 'self'}, {'href': 'http://localhost/conductors/', 'rel': 'bookmark'} ], + 'continue_inspection': [ + {'href': 'http://localhost/v1/continue_inspection/', + 'rel': 'self'}, + {'href': 'http://localhost/continue_inspection/', + 'rel': 'bookmark'} + ], 'deploy_templates': [ {'href': 'http://localhost/v1/deploy_templates/', 'rel': 'self'}, diff --git a/ironic/tests/unit/conductor/test_inspection.py b/ironic/tests/unit/conductor/test_inspection.py index c64b883e4c..4108b5e1bc 100644 --- a/ironic/tests/unit/conductor/test_inspection.py +++ b/ironic/tests/unit/conductor/test_inspection.py @@ -1,3 +1,4 @@ + # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at @@ -16,6 +17,7 @@ from ironic.common import exception from ironic.common import states from ironic.conductor import inspection from ironic.conductor import task_manager +from ironic.drivers.modules import inspect_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.objects import utils as obj_utils @@ -116,3 +118,56 @@ class TestInspectHardware(db_base.DbTestCase): self.assertEqual('Unexpected exception of type RuntimeError: x', node.last_error) self.assertTrue(mock_inspect.called) + + +@mock.patch('ironic.drivers.modules.fake.FakeInspect.continue_inspection', + autospec=True) +class TestContinueInspection(db_base.DbTestCase): + + def setUp(self): + super().setUp() + self.node = obj_utils.create_test_node( + self.context, provision_state=states.INSPECTING) + self.inventory = {"test": "inventory"} + self.plugin_data = {"plugin": "data"} + + @mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True) + def test_ok(self, mock_store, mock_continue): + with task_manager.acquire(self.context, self.node.id) as task: + inspection.continue_inspection(task, self.inventory, + self.plugin_data) + mock_continue.assert_called_once_with(task.driver.inspect, + task, self.inventory, + self.plugin_data) + mock_store.assert_called_once_with(task.node, self.inventory, + self.plugin_data, self.context) + self.node.refresh() + self.assertEqual(states.MANAGEABLE, self.node.provision_state) + + @mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True) + def test_ok_asynchronous(self, mock_store, mock_continue): + mock_continue.return_value = states.INSPECTWAIT + with task_manager.acquire(self.context, self.node.id) as task: + inspection.continue_inspection(task, self.inventory, + self.plugin_data) + mock_continue.assert_called_once_with(task.driver.inspect, + task, self.inventory, + self.plugin_data) + mock_store.assert_not_called() + self.assertEqual(states.INSPECTWAIT, task.node.provision_state) + + @mock.patch.object(inspect_utils, 'store_inspection_data', autospec=True) + def test_failure(self, mock_store, mock_continue): + mock_continue.side_effect = exception.HardwareInspectionFailure("boom") + with task_manager.acquire(self.context, self.node.id) as task: + self.assertRaises(exception.HardwareInspectionFailure, + inspection.continue_inspection, + task, self.inventory, self.plugin_data) + mock_continue.assert_called_once_with(task.driver.inspect, + task, self.inventory, + self.plugin_data) + + mock_store.assert_not_called() + self.node.refresh() + self.assertEqual(states.INSPECTFAIL, self.node.provision_state) + self.assertIn("boom", self.node.last_error) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index f0fb075b73..4e0be81c93 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -8439,3 +8439,34 @@ class ConcurrentActionLimitTestCase(mgr_utils.ServiceSetUpMixin, CONF.set_override('max_concurrent_clean', 4, group='conductor') self.service._concurrent_action_limit('cleaning') self.service._concurrent_action_limit('unprovisioning') + + +@mgr_utils.mock_record_keepalive +class ContinueInspectionTestCase(mgr_utils.ServiceSetUpMixin, + db_base.DbTestCase): + + @mock.patch.object(manager.ConductorManager, '_spawn_worker', + autospec=True) + def test_continue_ok(self, mock_spawn): + node = obj_utils.create_test_node(self.context, + provision_state=states.INSPECTWAIT) + self.service.continue_inspection(self.context, node.id, + {"test": "inventory"}, + ["plugin data"]) + node.refresh() + self.assertEqual(states.INSPECTING, node.provision_state) + mock_spawn.assert_called_once_with( + self.service, inspection.continue_inspection, mock.ANY, + {"test": "inventory"}, ["plugin data"]) + + def test_wrong_state(self): + node = obj_utils.create_test_node(self.context, + provision_state=states.AVAILABLE) + exc = self.assertRaises(messaging.rpc.ExpectedException, + self.service.continue_inspection, + self.context, node.id, + {"test": "inventory"}, + ["plugin data"]) + self.assertEqual(exception.NotFound, exc.exc_info[0]) + node.refresh() + self.assertEqual(states.AVAILABLE, node.provision_state) diff --git a/ironic/tests/unit/drivers/modules/test_inspect_utils.py b/ironic/tests/unit/drivers/modules/test_inspect_utils.py index 57980d5a2b..f6f5ab0190 100644 --- a/ironic/tests/unit/drivers/modules/test_inspect_utils.py +++ b/ironic/tests/unit/drivers/modules/test_inspect_utils.py @@ -13,14 +13,16 @@ # License for the specific language governing permissions and limitations # under the License. - +import socket from unittest import mock from oslo_utils import importutils +from oslo_utils import uuidutils import swiftclient.exceptions from ironic.common import context as ironic_context from ironic.common import exception +from ironic.common import states from ironic.common import swift from ironic.conductor import task_manager from ironic.conf import CONF @@ -293,3 +295,210 @@ class IntrospectionDataStorageFunctionsTestCase(db_base.DbTestCase): self.assertRaises(exception.SwiftObjectNotFoundError, utils._get_inspection_data_from_swift, self.node.uuid) + + +class LookupNodeTestCase(db_base.DbTestCase): + + def setUp(self): + super().setUp() + self.bmc = '192.0.2.1' + self.node = obj_utils.create_test_node( + self.context, + driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]}, + provision_state=states.INSPECTWAIT) + + self.macs = ['11:22:33:44:55:66', '12:34:56:78:90:ab'] + self.unknown_mac = '66:55:44:33:22:11' + self.ports = [ + obj_utils.create_test_port(self.context, + uuid=uuidutils.generate_uuid(), + node_id=self.node.id, + address=addr) + for addr in self.macs + ] + + self.bmc2 = '1.2.1.2' + self.mac2 = '00:11:00:11:00:11' + self.node2 = obj_utils.create_test_node( + self.context, + uuid=uuidutils.generate_uuid(), + driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc2]}, + provision_state=states.INSPECTWAIT) + obj_utils.create_test_port(self.context, + node_id=self.node2.id, + address=self.mac2) + + def test_no_input(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [], [], None) + + def test_by_macs(self): + result = utils.lookup_node(self.context, self.macs[::-1], [], None) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_macs_partial(self): + macs = [self.macs[1], self.unknown_mac] + result = utils.lookup_node(self.context, macs, [], None) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_mac_not_found(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [self.unknown_mac], [], None) + + def test_by_mac_wrong_state(self): + self.node.provision_state = states.AVAILABLE + self.node.save() + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, self.macs, [], None) + + def test_conflicting_macs(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [self.macs[0], self.mac2], [], None) + + def test_by_bmc(self): + result = utils.lookup_node(self.context, [], ['192.0.2.1'], None) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_bmc_and_mac(self): + result = utils.lookup_node( + self.context, [self.macs[0]], [self.bmc], None) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_unknown_bmc_and_mac(self): + result = utils.lookup_node( + self.context, [self.unknown_mac], [self.bmc], None) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_bmc_and_mac_and_uuid(self): + result = utils.lookup_node( + self.context, [self.macs[0]], [self.bmc], self.node.uuid) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_bmc_not_found(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [], ['192.168.1.1'], None) + + def test_by_bmc_wrong_state(self): + self.node.provision_state = states.AVAILABLE + self.node.save() + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [], [self.bmc], None) + + def test_conflicting_macs_and_bmc(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, self.macs, [self.bmc2], None) + + def test_by_uuid(self): + result = utils.lookup_node(self.context, [], [], self.node.uuid) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_uuid_and_unknown_macs(self): + result = utils.lookup_node( + self.context, [self.unknown_mac], [], self.node.uuid) + self.assertEqual(self.node.uuid, result.uuid) + + def test_by_uuid_not_found(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [], [], uuidutils.generate_uuid()) + + def test_by_uuid_wrong_state(self): + self.node.provision_state = states.AVAILABLE + self.node.save() + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, [], [], self.node.uuid) + + def test_conflicting_macs_and_uuid(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, self.macs, [], self.node2.uuid) + + def test_conflicting_bmc_and_uuid(self): + self.assertRaises(exception.NotFound, utils.lookup_node, + self.context, self.macs, [self.bmc], self.node2.uuid) + + +class GetBMCAddressesTestCase(db_base.DbTestCase): + + def test_localhost_ignored(self): + node = obj_utils.create_test_node( + self.context, + driver_info={'ipmi_address': '127.0.0.1'}) + self.assertEqual(set(), utils._get_bmc_addresses(node)) + + def test_localhost_as_url_ignored(self): + node = obj_utils.create_test_node( + self.context, + driver_info={'redfish_address': 'https://localhost/redfish'}) + self.assertEqual(set(), utils._get_bmc_addresses(node)) + + def test_normal_ip(self): + node = obj_utils.create_test_node( + self.context, + driver_info={'ipmi_address': '192.0.2.1'}) + self.assertEqual({'192.0.2.1'}, utils._get_bmc_addresses(node)) + + def test_normal_ip_as_url(self): + node = obj_utils.create_test_node( + self.context, + driver_info={'redfish_address': 'https://192.0.2.1/redfish'}) + self.assertEqual({'192.0.2.1'}, utils._get_bmc_addresses(node)) + + @mock.patch.object(socket, 'getaddrinfo', autospec=True) + def test_resolved_host(self, mock_getaddrinfo): + mock_getaddrinfo.return_value = [ + (socket.AF_INET6, socket.SOCK_STREAM, socket.SOL_TCP, + '', ('2001:db8::42', None)), + (socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP, + '', ('192.0.2.1', None)), + ] + node = obj_utils.create_test_node( + self.context, + driver_info={'ipmi_address': 'example.com'}) + self.assertEqual({'example.com', '192.0.2.1', '2001:db8::42'}, + utils._get_bmc_addresses(node)) + mock_getaddrinfo.assert_called_once_with( + 'example.com', None, proto=socket.SOL_TCP) + + @mock.patch.object(socket, 'getaddrinfo', autospec=True) + def test_resolved_host_in_url(self, mock_getaddrinfo): + mock_getaddrinfo.return_value = [ + (socket.AF_INET6, socket.SOCK_STREAM, socket.SOL_TCP, + '', ('2001:db8::42', None)), + (socket.AF_INET, socket.SOCK_STREAM, socket.SOL_TCP, + '', ('192.0.2.1', None)), + ] + node = obj_utils.create_test_node( + self.context, + driver_info={'redfish_address': 'https://example.com:8080/v1'}) + self.assertEqual({'example.com', '192.0.2.1', '2001:db8::42'}, + utils._get_bmc_addresses(node)) + mock_getaddrinfo.assert_called_once_with( + 'example.com', None, proto=socket.SOL_TCP) + + +class LookupCacheTestCase(db_base.DbTestCase): + + def setUp(self): + super().setUp() + self.bmc = '192.0.2.1' + self.node = obj_utils.create_test_node( + self.context, + driver_internal_info={utils.LOOKUP_CACHE_FIELD: [self.bmc]}, + provision_state=states.INSPECTWAIT) + + def test_clear(self): + result = utils.clear_lookup_addresses(self.node) + self.assertEqual([self.bmc], result) + self.assertEqual({}, self.node.driver_internal_info) + + @mock.patch.object(utils, '_get_bmc_addresses', autospec=True) + def test_new_value(self, mock_get_addr): + mock_get_addr.return_value = {'192.0.2.42'} + utils.cache_lookup_addresses(self.node) + self.assertEqual({utils.LOOKUP_CACHE_FIELD: ['192.0.2.42']}, + self.node.driver_internal_info) + + @mock.patch.object(utils, '_get_bmc_addresses', autospec=True) + def test_replace_with_empty(self, mock_get_addr): + mock_get_addr.return_value = set() + utils.cache_lookup_addresses(self.node) + self.assertEqual({}, self.node.driver_internal_info)