From d3c3d4dabe907ac9b6fc9eb95a6015564ed55d72 Mon Sep 17 00:00:00 2001 From: Julia Kreger Date: Wed, 19 Aug 2020 18:44:39 -0700 Subject: [PATCH] Update the cache if we don't have a root device hint Or at least try to. Some deployments just don't use root device hints, and this is okay. However, other deployments need root device hints, and with fast track mode in ramdisks, we created a situation where the node cache could be updated by a human or software between the time the agent was started, and the deployment was requested. As a result, the agent has been updated to check if we have a hint and if we don't, update the cache from the node lookup endpoint. This is not needed when the inband deploy steps are executed, as the process of updating the steps does force the node cache to be updated. Change-Id: I27201319f31cdc01605a3c5ae9ef4b4218e4a3f6 Story: 2008039 Task: 40701 --- ironic_python_agent/agent.py | 4 ++ ironic_python_agent/extensions/iscsi.py | 3 +- ironic_python_agent/extensions/standby.py | 6 +- ironic_python_agent/hardware.py | 47 ++++++++++++- .../tests/unit/extensions/test_iscsi.py | 12 ++-- .../tests/unit/extensions/test_standby.py | 30 +++++--- .../tests/unit/test_hardware.py | 70 +++++++++++++++++++ ...asttrack-stale-cache-fd93b56a955c7ab1.yaml | 9 +++ 8 files changed, 161 insertions(+), 20 deletions(-) create mode 100644 releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index ec520cd63..1cfe2e202 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -472,6 +472,10 @@ class IronicPythonAgent(base.ExecuteCommandMixin): node_uuid=uuid) LOG.debug('Received lookup results: %s', content) self.process_lookup_data(content) + # Save the API url in case we need it later. + hardware.save_api_client( + self.api_client, self.lookup_timeout, + self.lookup_interval) elif cfg.CONF.inspection_callback_url: LOG.info('No ipa-api-url configured, Heartbeat and lookup ' diff --git a/ironic_python_agent/extensions/iscsi.py b/ironic_python_agent/extensions/iscsi.py index 1b3b308cb..c519ab32c 100644 --- a/ironic_python_agent/extensions/iscsi.py +++ b/ironic_python_agent/extensions/iscsi.py @@ -185,7 +185,8 @@ class ISCSIExtension(base.BaseAgentExtension): if iqn is None: iqn = 'iqn.2008-10.org.openstack:%s' % uuidutils.generate_uuid() - device = hardware.dispatch_to_managers('get_os_install_device') + device = hardware.dispatch_to_managers('get_os_install_device', + permit_refresh=True) if wipe_disk_metadata: disk_utils.destroy_disk_metadata( diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index e687d10ef..0d42c9292 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -627,7 +627,8 @@ class StandbyExtension(base.BaseAgentExtension): :raises: ImageWriteError if writing the image fails. """ LOG.debug('Caching image %s', image_info['id']) - device = hardware.dispatch_to_managers('get_os_install_device') + device = hardware.dispatch_to_managers('get_os_install_device', + permit_refresh=True) msg = 'image ({}) already present on device {} ' @@ -669,7 +670,8 @@ class StandbyExtension(base.BaseAgentExtension): large to store on the given device. """ LOG.debug('Preparing image %s', image_info['id']) - device = hardware.dispatch_to_managers('get_os_install_device') + device = hardware.dispatch_to_managers('get_os_install_device', + permit_refresh=True) disk_format = image_info.get('disk_format') stream_raw_images = image_info.get('stream_raw_images', False) diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 5aed30670..399d70609 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -53,7 +53,9 @@ UNIT_CONVERTER.define('bytes = []') UNIT_CONVERTER.define('MB = 1048576 bytes') _MEMORY_ID_RE = re.compile(r'^memory(:\d+)?$') NODE = None - +API_CLIENT = None +API_LOOKUP_TIMEOUT = None +API_LOOKUP_INTERVAL = None SUPPORTED_SOFTWARE_RAID_LEVELS = frozenset(['0', '1', '1+0', '5', '6']) RAID_APPLY_CONFIGURATION_ARGSINFO = { @@ -470,6 +472,40 @@ def list_all_block_devices(block_type='disk', return devices +def save_api_client(client=None, timeout=None, interval=None): + """Preserves access to the API client for potential later re-use.""" + global API_CLIENT, API_LOOKUP_TIMEOUT, API_LOOKUP_INTERVAL + + if client and timeout and interval and not API_CLIENT: + API_CLIENT = client + API_LOOKUP_TIMEOUT = timeout + API_LOOKUP_INTERVAL = interval + + +def update_cached_node(): + """Attmepts to update the node cache via the API""" + cached_node = get_cached_node() + if API_CLIENT: + LOG.info('Agent is requesting to perform an explicit node cache ' + 'update. This is to pickup any chanages in the cache ' + 'before deployment.') + try: + if cached_node is None: + uuid = None + else: + uuid = cached_node['uuid'] + content = API_CLIENT.lookup_node( + hardware_info=list_hardware_info(use_cache=True), + timeout=API_LOOKUP_TIMEOUT, + starting_interval=API_LOOKUP_INTERVAL, + uuid=uuid) + cache_node(content['node']) + return content['node'] + except Exception as exc: + LOG.warning('Failed to update node cache. Error %s', exc) + return cached_node + + class HardwareSupport(object): """Example priorities for hardware managers. @@ -597,7 +633,7 @@ class HardwareManager(object, metaclass=abc.ABCMeta): def get_memory(self): raise errors.IncompatibleHardwareMethodError - def get_os_install_device(self): + def get_os_install_device(self, permit_refresh=False): raise errors.IncompatibleHardwareMethodError def get_bmc_address(self): @@ -1052,13 +1088,18 @@ class GenericHardwareManager(HardwareManager): ) return block_devices - def get_os_install_device(self): + def get_os_install_device(self, permit_refresh=False): cached_node = get_cached_node() root_device_hints = None if cached_node is not None: root_device_hints = ( cached_node['instance_info'].get('root_device') or cached_node['properties'].get('root_device')) + if permit_refresh and not root_device_hints: + cached_node = update_cached_node() + root_device_hints = ( + cached_node['instance_info'].get('root_device') + or cached_node['properties'].get('root_device')) LOG.debug('Looking for a device matching root hints %s', root_device_hints) diff --git a/ironic_python_agent/tests/unit/extensions/test_iscsi.py b/ironic_python_agent/tests/unit/extensions/test_iscsi.py index 131ad0751..a4678a98e 100644 --- a/ironic_python_agent/tests/unit/extensions/test_iscsi.py +++ b/ironic_python_agent/tests/unit/extensions/test_iscsi.py @@ -69,7 +69,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): '--op', 'bind', '--tid', '1', '--initiator-address', 'ALL')] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual({'iscsi_target_iqn': self.fake_iqn}, result.command_result) self.assertFalse(mock_destroy.called) @@ -98,7 +99,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): '--op', 'bind', '--tid', '1', '--initiator-address', 'ALL')] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual({'iscsi_target_iqn': self.fake_iqn}, result.command_result) @@ -119,7 +121,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): '--op', 'show', attempts=10)] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertFalse(mock_destroy.called) @mock.patch.object(iscsi, '_wait_for_tgtd', autospec=True) @@ -138,7 +141,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): 'target', '--op', 'new', '--tid', '1', '--targetname', self.fake_iqn)] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) def test_start_iscsi_target_fail_command_not_exist(self, mock_execute, mock_dispatch, diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index 5d27f6d73..9ca0f244e 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -569,7 +569,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -595,7 +596,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -626,7 +628,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -654,7 +657,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() self.assertFalse(download_mock.called) self.assertFalse(write_mock.called) - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -699,7 +703,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) configdrive_copy_mock.assert_called_once_with(image_info['node_uuid'], 'manager', 'configdrive_data') @@ -749,7 +754,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertFalse(configdrive_copy_mock.called) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -821,7 +827,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(0, configdrive_copy_mock.call_count) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -871,7 +878,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertFalse(configdrive_copy_mock.called) self.assertEqual('FAILED', async_result.command_status) @@ -913,7 +921,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) configdrive_copy_mock.assert_called_once_with(image_info['node_uuid'], 'manager', 'configdrive_data') @@ -967,7 +976,8 @@ class TestStandbyExtension(base.IronicAgentTest): ) async_result.join() - dispatch_mock.assert_any_call('get_os_install_device') + dispatch_mock.assert_any_call('get_os_install_device', + permit_refresh=True) self.assertFalse(configdrive_copy_mock.called) # Assert we've streamed the image or not diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 813c87696..0ab04b673 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -1467,6 +1467,46 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock_cached_node.assert_called_once_with() mock_dev.assert_called_once_with() + @mock.patch.object(hardware, 'update_cached_node', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + def test_get_os_install_device_no_root_device(self, mock_cached_node, + mock_dev, + mock_update): + mock_cached_node.return_value = {'properties': {}, + 'uuid': 'node1', + 'instance_info': {}} + mock_dev.return_value = [ + hardware.BlockDevice(name='/dev/sda', + model='TinyUSB Drive', + size=3116853504, + rotational=False, + vendor='Super Vendor', + wwn='wwn0', + wwn_with_extension='wwn0ven0', + wwn_vendor_extension='ven0', + serial='serial0'), + hardware.BlockDevice(name='/dev/sdb', + model='magical disk', + size=10737418240, + rotational=True, + vendor='fake-vendor', + wwn='fake-wwn', + wwn_with_extension='fake-wwnven0', + wwn_vendor_extension='ven0', + serial='fake-serial', + by_path='/dev/disk/by-path/1:0:0:0'), + ] + mock_update.return_value = {'properties': {'root_device': + {'name': '/dev/sda'}}, + 'uuid': 'node1', + 'instance_info': {'magic': 'value'}} + self.assertEqual('/dev/sda', + self.hardware.get_os_install_device( + permit_refresh=True)) + self.assertEqual(1, mock_cached_node.call_count) + mock_dev.assert_called_once_with() + def test__get_device_info(self): fileobj = mock.mock_open(read_data='fake-vendor') with mock.patch( @@ -4468,3 +4508,33 @@ class TestListHardwareInfo(base.IronicAgentTest): self.assertEqual(fake_info, hardware.list_hardware_info()) mock_dispatch.assert_called_with('list_hardware_info') self.assertEqual(2, mock_dispatch.call_count) + + +class TestAPIClientSaveAndUse(base.IronicAgentTest): + + def test_save_api_client(self): + hardware.API_CLIENT = None + mock_api_client = mock.Mock() + hardware.save_api_client(mock_api_client, 1, 2) + self.assertEqual(mock_api_client, hardware.API_CLIENT) + + @mock.patch('ironic_python_agent.hardware.dispatch_to_managers', + autospec=True) + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + def test_update_node_cache(self, mock_cached_node, mock_dispatch): + mock_cached_node.return_value = {'uuid': 'node1'} + updated_node = {'uuid': 'node1', 'other': 'key'} + hardware.API_CLIENT = None + mock_api_client = mock.Mock() + hardware.save_api_client(mock_api_client, 1, 2) + mock_api_client.lookup_node.return_value = {'node': updated_node} + self.assertEqual(updated_node, hardware.update_cached_node()) + mock_api_client.lookup_node.assert_called_with( + hardware_info=mock.ANY, + timeout=1, + starting_interval=2, + uuid='node1') + self.assertEqual(updated_node, hardware.NODE) + calls = [mock.call('list_hardware_info'), + mock.call('wait_for_disks')] + mock_dispatch.assert_has_calls(calls) diff --git a/releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml b/releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml new file mode 100644 index 000000000..6738b7fe3 --- /dev/null +++ b/releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes an issue with nodes undergoing fast-track from introspection to + deployment where the agent internal cache of the node may be stale. + In particular, this can be observed if node does not honor a root device + hint which is saved to Ironic's API *after* the agent was started. + More information can be found in `story 2008039 + `_.