diff --git a/ironic_python_agent/agent.py b/ironic_python_agent/agent.py index ec520cd63..1cfe2e202 100644 --- a/ironic_python_agent/agent.py +++ b/ironic_python_agent/agent.py @@ -472,6 +472,10 @@ class IronicPythonAgent(base.ExecuteCommandMixin): node_uuid=uuid) LOG.debug('Received lookup results: %s', content) self.process_lookup_data(content) + # Save the API url in case we need it later. + hardware.save_api_client( + self.api_client, self.lookup_timeout, + self.lookup_interval) elif cfg.CONF.inspection_callback_url: LOG.info('No ipa-api-url configured, Heartbeat and lookup ' diff --git a/ironic_python_agent/extensions/iscsi.py b/ironic_python_agent/extensions/iscsi.py index 1b3b308cb..c519ab32c 100644 --- a/ironic_python_agent/extensions/iscsi.py +++ b/ironic_python_agent/extensions/iscsi.py @@ -185,7 +185,8 @@ class ISCSIExtension(base.BaseAgentExtension): if iqn is None: iqn = 'iqn.2008-10.org.openstack:%s' % uuidutils.generate_uuid() - device = hardware.dispatch_to_managers('get_os_install_device') + device = hardware.dispatch_to_managers('get_os_install_device', + permit_refresh=True) if wipe_disk_metadata: disk_utils.destroy_disk_metadata( diff --git a/ironic_python_agent/extensions/standby.py b/ironic_python_agent/extensions/standby.py index e687d10ef..0d42c9292 100644 --- a/ironic_python_agent/extensions/standby.py +++ b/ironic_python_agent/extensions/standby.py @@ -627,7 +627,8 @@ class StandbyExtension(base.BaseAgentExtension): :raises: ImageWriteError if writing the image fails. """ LOG.debug('Caching image %s', image_info['id']) - device = hardware.dispatch_to_managers('get_os_install_device') + device = hardware.dispatch_to_managers('get_os_install_device', + permit_refresh=True) msg = 'image ({}) already present on device {} ' @@ -669,7 +670,8 @@ class StandbyExtension(base.BaseAgentExtension): large to store on the given device. """ LOG.debug('Preparing image %s', image_info['id']) - device = hardware.dispatch_to_managers('get_os_install_device') + device = hardware.dispatch_to_managers('get_os_install_device', + permit_refresh=True) disk_format = image_info.get('disk_format') stream_raw_images = image_info.get('stream_raw_images', False) diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 5aed30670..399d70609 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -53,7 +53,9 @@ UNIT_CONVERTER.define('bytes = []') UNIT_CONVERTER.define('MB = 1048576 bytes') _MEMORY_ID_RE = re.compile(r'^memory(:\d+)?$') NODE = None - +API_CLIENT = None +API_LOOKUP_TIMEOUT = None +API_LOOKUP_INTERVAL = None SUPPORTED_SOFTWARE_RAID_LEVELS = frozenset(['0', '1', '1+0', '5', '6']) RAID_APPLY_CONFIGURATION_ARGSINFO = { @@ -470,6 +472,40 @@ def list_all_block_devices(block_type='disk', return devices +def save_api_client(client=None, timeout=None, interval=None): + """Preserves access to the API client for potential later re-use.""" + global API_CLIENT, API_LOOKUP_TIMEOUT, API_LOOKUP_INTERVAL + + if client and timeout and interval and not API_CLIENT: + API_CLIENT = client + API_LOOKUP_TIMEOUT = timeout + API_LOOKUP_INTERVAL = interval + + +def update_cached_node(): + """Attmepts to update the node cache via the API""" + cached_node = get_cached_node() + if API_CLIENT: + LOG.info('Agent is requesting to perform an explicit node cache ' + 'update. This is to pickup any chanages in the cache ' + 'before deployment.') + try: + if cached_node is None: + uuid = None + else: + uuid = cached_node['uuid'] + content = API_CLIENT.lookup_node( + hardware_info=list_hardware_info(use_cache=True), + timeout=API_LOOKUP_TIMEOUT, + starting_interval=API_LOOKUP_INTERVAL, + uuid=uuid) + cache_node(content['node']) + return content['node'] + except Exception as exc: + LOG.warning('Failed to update node cache. Error %s', exc) + return cached_node + + class HardwareSupport(object): """Example priorities for hardware managers. @@ -597,7 +633,7 @@ class HardwareManager(object, metaclass=abc.ABCMeta): def get_memory(self): raise errors.IncompatibleHardwareMethodError - def get_os_install_device(self): + def get_os_install_device(self, permit_refresh=False): raise errors.IncompatibleHardwareMethodError def get_bmc_address(self): @@ -1052,13 +1088,18 @@ class GenericHardwareManager(HardwareManager): ) return block_devices - def get_os_install_device(self): + def get_os_install_device(self, permit_refresh=False): cached_node = get_cached_node() root_device_hints = None if cached_node is not None: root_device_hints = ( cached_node['instance_info'].get('root_device') or cached_node['properties'].get('root_device')) + if permit_refresh and not root_device_hints: + cached_node = update_cached_node() + root_device_hints = ( + cached_node['instance_info'].get('root_device') + or cached_node['properties'].get('root_device')) LOG.debug('Looking for a device matching root hints %s', root_device_hints) diff --git a/ironic_python_agent/tests/unit/extensions/test_iscsi.py b/ironic_python_agent/tests/unit/extensions/test_iscsi.py index 131ad0751..a4678a98e 100644 --- a/ironic_python_agent/tests/unit/extensions/test_iscsi.py +++ b/ironic_python_agent/tests/unit/extensions/test_iscsi.py @@ -69,7 +69,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): '--op', 'bind', '--tid', '1', '--initiator-address', 'ALL')] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual({'iscsi_target_iqn': self.fake_iqn}, result.command_result) self.assertFalse(mock_destroy.called) @@ -98,7 +99,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): '--op', 'bind', '--tid', '1', '--initiator-address', 'ALL')] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual({'iscsi_target_iqn': self.fake_iqn}, result.command_result) @@ -119,7 +121,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): '--op', 'show', attempts=10)] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertFalse(mock_destroy.called) @mock.patch.object(iscsi, '_wait_for_tgtd', autospec=True) @@ -138,7 +141,8 @@ class TestISCSIExtensionTgt(base.IronicAgentTest): 'target', '--op', 'new', '--tid', '1', '--targetname', self.fake_iqn)] mock_execute.assert_has_calls(expected) - mock_dispatch.assert_called_once_with('get_os_install_device') + mock_dispatch.assert_called_once_with('get_os_install_device', + permit_refresh=True) def test_start_iscsi_target_fail_command_not_exist(self, mock_execute, mock_dispatch, diff --git a/ironic_python_agent/tests/unit/extensions/test_standby.py b/ironic_python_agent/tests/unit/extensions/test_standby.py index 5d27f6d73..9ca0f244e 100644 --- a/ironic_python_agent/tests/unit/extensions/test_standby.py +++ b/ironic_python_agent/tests/unit/extensions/test_standby.py @@ -569,7 +569,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -595,7 +596,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -626,7 +628,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -654,7 +657,8 @@ class TestStandbyExtension(base.IronicAgentTest): async_result.join() self.assertFalse(download_mock.called) self.assertFalse(write_mock.called) - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(image_info['id'], self.agent_extension.cached_image_id) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -699,7 +703,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) configdrive_copy_mock.assert_called_once_with(image_info['node_uuid'], 'manager', 'configdrive_data') @@ -749,7 +754,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertFalse(configdrive_copy_mock.called) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -821,7 +827,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertEqual(0, configdrive_copy_mock.call_count) self.assertEqual('SUCCEEDED', async_result.command_status) @@ -871,7 +878,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) self.assertFalse(configdrive_copy_mock.called) self.assertEqual('FAILED', async_result.command_status) @@ -913,7 +921,8 @@ class TestStandbyExtension(base.IronicAgentTest): download_mock.assert_called_once_with(image_info) write_mock.assert_called_once_with(image_info, 'manager') - dispatch_mock.assert_called_once_with('get_os_install_device') + dispatch_mock.assert_called_once_with('get_os_install_device', + permit_refresh=True) configdrive_copy_mock.assert_called_once_with(image_info['node_uuid'], 'manager', 'configdrive_data') @@ -967,7 +976,8 @@ class TestStandbyExtension(base.IronicAgentTest): ) async_result.join() - dispatch_mock.assert_any_call('get_os_install_device') + dispatch_mock.assert_any_call('get_os_install_device', + permit_refresh=True) self.assertFalse(configdrive_copy_mock.called) # Assert we've streamed the image or not diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 813c87696..0ab04b673 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -1467,6 +1467,46 @@ class TestGenericHardwareManager(base.IronicAgentTest): mock_cached_node.assert_called_once_with() mock_dev.assert_called_once_with() + @mock.patch.object(hardware, 'update_cached_node', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + def test_get_os_install_device_no_root_device(self, mock_cached_node, + mock_dev, + mock_update): + mock_cached_node.return_value = {'properties': {}, + 'uuid': 'node1', + 'instance_info': {}} + mock_dev.return_value = [ + hardware.BlockDevice(name='/dev/sda', + model='TinyUSB Drive', + size=3116853504, + rotational=False, + vendor='Super Vendor', + wwn='wwn0', + wwn_with_extension='wwn0ven0', + wwn_vendor_extension='ven0', + serial='serial0'), + hardware.BlockDevice(name='/dev/sdb', + model='magical disk', + size=10737418240, + rotational=True, + vendor='fake-vendor', + wwn='fake-wwn', + wwn_with_extension='fake-wwnven0', + wwn_vendor_extension='ven0', + serial='fake-serial', + by_path='/dev/disk/by-path/1:0:0:0'), + ] + mock_update.return_value = {'properties': {'root_device': + {'name': '/dev/sda'}}, + 'uuid': 'node1', + 'instance_info': {'magic': 'value'}} + self.assertEqual('/dev/sda', + self.hardware.get_os_install_device( + permit_refresh=True)) + self.assertEqual(1, mock_cached_node.call_count) + mock_dev.assert_called_once_with() + def test__get_device_info(self): fileobj = mock.mock_open(read_data='fake-vendor') with mock.patch( @@ -4468,3 +4508,33 @@ class TestListHardwareInfo(base.IronicAgentTest): self.assertEqual(fake_info, hardware.list_hardware_info()) mock_dispatch.assert_called_with('list_hardware_info') self.assertEqual(2, mock_dispatch.call_count) + + +class TestAPIClientSaveAndUse(base.IronicAgentTest): + + def test_save_api_client(self): + hardware.API_CLIENT = None + mock_api_client = mock.Mock() + hardware.save_api_client(mock_api_client, 1, 2) + self.assertEqual(mock_api_client, hardware.API_CLIENT) + + @mock.patch('ironic_python_agent.hardware.dispatch_to_managers', + autospec=True) + @mock.patch.object(hardware, 'get_cached_node', autospec=True) + def test_update_node_cache(self, mock_cached_node, mock_dispatch): + mock_cached_node.return_value = {'uuid': 'node1'} + updated_node = {'uuid': 'node1', 'other': 'key'} + hardware.API_CLIENT = None + mock_api_client = mock.Mock() + hardware.save_api_client(mock_api_client, 1, 2) + mock_api_client.lookup_node.return_value = {'node': updated_node} + self.assertEqual(updated_node, hardware.update_cached_node()) + mock_api_client.lookup_node.assert_called_with( + hardware_info=mock.ANY, + timeout=1, + starting_interval=2, + uuid='node1') + self.assertEqual(updated_node, hardware.NODE) + calls = [mock.call('list_hardware_info'), + mock.call('wait_for_disks')] + mock_dispatch.assert_has_calls(calls) diff --git a/releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml b/releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml new file mode 100644 index 000000000..6738b7fe3 --- /dev/null +++ b/releasenotes/notes/fasttrack-stale-cache-fd93b56a955c7ab1.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes an issue with nodes undergoing fast-track from introspection to + deployment where the agent internal cache of the node may be stale. + In particular, this can be observed if node does not honor a root device + hint which is saved to Ironic's API *after* the agent was started. + More information can be found in `story 2008039 + `_.