From d7d8f8754b36af4b686ce540e699fc5cb945c10c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aija=20Jaunt=C4=93va?= Date: Tue, 28 Dec 2021 07:16:01 -0500 Subject: [PATCH] Fix Redfish RAID for non-immediate controllers Fixes ``redfish`` RAID interface workflow to not create more than 1 logical disks per controller at the same time when controller does not support 'Immediate' application time and system needs rebooting. With this fix virtual disks are grouped by controller and executed in batches when reboot is required. This enables ``redfish`` RAID interface to be used with iDRAC systems directly as although there are many controllers that support 'Immediate' application time, they do it only when system has completed loading. System intermittently completes loading after IPA starts executing clean or deploy steps rendering controllers not supporting 'Immediate' apply time and requiring reboot. This fix handles such scenario instead of failing to create more than 1 virtual disk. Story: 2009863 Task: 44529 Change-Id: Ia2ce34f09695731b0f48798f662006c4904e2223 --- doc/source/admin/drivers/idrac.rst | 9 + ironic/drivers/modules/redfish/raid.py | 282 +++++++++++------- .../unit/drivers/modules/redfish/test_raid.py | 276 +++++++++++++++-- ...aid-onreset-workflow-bfa44de6b0263a1f.yaml | 9 + 4 files changed, 435 insertions(+), 141 deletions(-) create mode 100644 releasenotes/notes/fix-redfish-raid-onreset-workflow-bfa44de6b0263a1f.yaml diff --git a/doc/source/admin/drivers/idrac.rst b/doc/source/admin/drivers/idrac.rst index 494d151a75..b5aa92539e 100644 --- a/doc/source/admin/drivers/idrac.rst +++ b/doc/source/admin/drivers/idrac.rst @@ -466,6 +466,15 @@ RAID Interface See :doc:`/admin/raid` for more information on Ironic RAID support. +RAID interface of ``redfish`` hardware type can be used on iDRAC systems. +Compared to ``redfish`` RAID interface, using ``idrac-redfish`` adds: + +* Waiting for real-time operations to be available on RAID controllers. When + using ``redfish`` this is not guaranteed and reboots might be intermittently + required to complete, +* Converting non-RAID disks to RAID mode if there are any, +* Clearing foreign configuration, if any, after deleting virtual disks. + The following properties are supported by the iDRAC WSMAN and Redfish RAID interface implementation: diff --git a/ironic/drivers/modules/redfish/raid.py b/ironic/drivers/modules/redfish/raid.py index a7bf752597..ab9a3589ed 100644 --- a/ironic/drivers/modules/redfish/raid.py +++ b/ironic/drivers/modules/redfish/raid.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import itertools import math from ironic_lib import metrics_utils @@ -700,32 +701,6 @@ class RedfishRAID(base.RAIDInterface): """ return redfish_utils.COMMON_PROPERTIES.copy() - def _validate_vendor(self, task): - vendor = task.node.properties.get('vendor') - if not vendor: - return - - if 'dell' in vendor.lower().split(): - raise exception.InvalidParameterValue( - _("The %(iface)s raid interface is not suitable for node " - "%(node)s with vendor %(vendor)s, use idrac-redfish instead") - % {'iface': task.node.get_interface('raid'), - 'node': task.node.uuid, 'vendor': vendor}) - - def validate(self, task): - """Validates the RAID Interface. - - This method validates the properties defined by Ironic for RAID - configuration. Driver implementations of this interface can override - this method for doing more validations (such as BMC's credentials). - - :param task: A TaskManager instance. - :raises: InvalidParameterValue, if the RAID configuration is invalid. - :raises: MissingParameterValue, if some parameters are missing. - """ - self._validate_vendor(task) - super(RedfishRAID, self).validate(task) - def validate_raid_config(self, task, raid_config): """Validates the given RAID configuration. @@ -817,31 +792,14 @@ class RedfishRAID(base.RAIDInterface): logical_disks_to_create = self.pre_create_configuration( task, logical_disks_to_create) - reboot_required = False - raid_configs = list() - for logical_disk in logical_disks_to_create: - raid_config = dict() - response = create_virtual_disk( - task, - raid_controller=logical_disk.get('controller'), - physical_disks=logical_disk['physical_disks'], - raid_level=logical_disk['raid_level'], - size_bytes=logical_disk['size_bytes'], - disk_name=logical_disk.get('name'), - span_length=logical_disk.get('span_length'), - span_depth=logical_disk.get('span_depth'), - error_handler=self.volume_create_error_handler) - # only save the async tasks (task_monitors) in raid_config - if (response is not None - and hasattr(response, 'task_monitor_uri')): - raid_config['operation'] = 'create' - raid_config['raid_controller'] = logical_disk.get( - 'controller') - raid_config['task_monitor_uri'] = response.task_monitor_uri - reboot_required = True - raid_configs.append(raid_config) - - node.set_driver_internal_info('raid_configs', raid_configs) + # Group logical disks by controller + def gb_key(x): + return x.get('controller') + gb_list = itertools.groupby( + sorted(logical_disks_to_create, key=gb_key), gb_key) + ld_grouped = {k: list(g) for k, g in gb_list} + raid_configs, reboot_required = self._submit_create_configuration( + task, ld_grouped) return_state = None deploy_utils.set_async_step_flags( @@ -866,53 +824,8 @@ class RedfishRAID(base.RAIDInterface): complete. """ node = task.node - system = redfish_utils.get_system(node) - vols_to_delete = [] - try: - for storage in system.storage.get_members(): - controller = (storage.storage_controllers[0] - if storage.storage_controllers else None) - controller_id = None - if controller: - controller_id = storage.identity - for volume in storage.volumes.get_members(): - if (volume.raid_type or volume.volume_type not in - [None, sushy.VOLUME_TYPE_RAW_DEVICE]): - vols_to_delete.append((storage.volumes, volume, - controller_id)) - except sushy.exceptions.SushyError as exc: - error_msg = _('Cannot get the list of volumes to delete for node ' - '%(node_uuid)s. Reason: %(error)s.' % - {'node_uuid': node.uuid, 'error': exc}) - LOG.error(error_msg) - raise exception.RedfishError(error=exc) - - self.pre_delete_configuration(task, vols_to_delete) - - reboot_required = False - raid_configs = list() - for vol_coll, volume, controller_id in vols_to_delete: - raid_config = dict() - apply_time = None - apply_time_support = vol_coll.operation_apply_time_support - if (apply_time_support - and apply_time_support.mapped_supported_values): - supported_values = apply_time_support.mapped_supported_values - if sushy.APPLY_TIME_IMMEDIATE in supported_values: - apply_time = sushy.APPLY_TIME_IMMEDIATE - elif sushy.APPLY_TIME_ON_RESET in supported_values: - apply_time = sushy.APPLY_TIME_ON_RESET - response = volume.delete(apply_time=apply_time) - # only save the async tasks (task_monitors) in raid_config - if (response is not None - and hasattr(response, 'task_monitor_uri')): - raid_config['operation'] = 'delete' - raid_config['raid_controller'] = controller_id - raid_config['task_monitor_uri'] = response.task_monitor_uri - reboot_required = True - raid_configs.append(raid_config) - - node.set_driver_internal_info('raid_configs', raid_configs) + raid_configs, reboot_required = self._submit_delete_configuration( + task) return_state = None deploy_utils.set_async_step_flags( @@ -1041,14 +954,15 @@ class RedfishRAID(base.RAIDInterface): """Periodic job to check RAID config tasks.""" self._check_node_raid_config(task) - def _raid_config_in_progress(self, task, raid_config): + def _raid_config_in_progress(self, task, task_monitor_uri, operation): """Check if this RAID configuration operation is still in progress. :param task: TaskManager object containing the node. - :param raid_config: RAID configuration operation details. + :param task_monitor_uri: Redfish task monitor URI + :param operation: 'create' or 'delete' operation for given task. + Used in log messages. :returns: True, if still in progress, otherwise False. """ - task_monitor_uri = raid_config['task_monitor_uri'] try: task_monitor = redfish_utils.get_task_monitor(task.node, task_monitor_uri) @@ -1056,14 +970,14 @@ class RedfishRAID(base.RAIDInterface): LOG.info('Unable to get status of RAID %(operation)s task ' '%(task_mon_uri)s to node %(node_uuid)s; assuming task ' 'completed successfully', - {'operation': raid_config['operation'], + {'operation': operation, 'task_mon_uri': task_monitor_uri, 'node_uuid': task.node.uuid}) return False if task_monitor.is_processing: LOG.debug('RAID %(operation)s task %(task_mon_uri)s to node ' '%(node_uuid)s still in progress', - {'operation': raid_config['operation'], + {'operation': operation, 'task_mon_uri': task_monitor.task_monitor_uri, 'node_uuid': task.node.uuid}) return True @@ -1080,13 +994,13 @@ class RedfishRAID(base.RAIDInterface): [sushy.HEALTH_OK, sushy.HEALTH_WARNING]): LOG.info('RAID %(operation)s task %(task_mon_uri)s to node ' '%(node_uuid)s completed.', - {'operation': raid_config['operation'], + {'operation': operation, 'task_mon_uri': task_monitor.task_monitor_uri, 'node_uuid': task.node.uuid}) else: LOG.error('RAID %(operation)s task %(task_mon_uri)s to node ' '%(node_uuid)s failed; messages: %(messages)s', - {'operation': raid_config['operation'], + {'operation': operation, 'task_mon_uri': task_monitor.task_monitor_uri, 'node_uuid': task.node.uuid, 'messages': ", ".join(messages)}) @@ -1094,19 +1008,157 @@ class RedfishRAID(base.RAIDInterface): @METRICS.timer('RedfishRAID._check_node_raid_config') def _check_node_raid_config(self, task): - """Check the progress of running RAID config on a node.""" + """Check the progress of running RAID config on a node. + + :param task: TaskManager object containing the node. + """ node = task.node raid_configs = node.driver_internal_info['raid_configs'] task.upgrade_lock() - raid_configs[:] = [i for i in raid_configs - if self._raid_config_in_progress(task, i)] + raid_configs['task_monitor_uri'] =\ + [i for i in raid_configs.get('task_monitor_uri') + if self._raid_config_in_progress( + task, i, raid_configs.get('operation'))] + node.set_driver_internal_info('raid_configs', raid_configs) - if not raid_configs: - self._clear_raid_configs(node) - LOG.info('RAID configuration completed for node %(node)s', - {'node': node.uuid}) - if task.node.clean_step: - manager_utils.notify_conductor_resume_clean(task) + if not raid_configs['task_monitor_uri']: + if raid_configs.get('pending'): + if raid_configs.get('operation') == 'create': + reboot_required = self._submit_create_configuration( + task, raid_configs.get('pending'))[1] + else: + reboot_required = self._submit_delete_configuration( + task)[1] + if reboot_required: + deploy_utils.reboot_to_finish_step(task) else: - manager_utils.notify_conductor_resume_deploy(task) + self._clear_raid_configs(node) + LOG.info('RAID configuration completed for node %(node)s', + {'node': node.uuid}) + if task.node.clean_step: + manager_utils.notify_conductor_resume_clean(task) + else: + manager_utils.notify_conductor_resume_deploy(task) + + def _submit_create_configuration(self, task, ld_grouped): + """Processes and submits requests for creating RAID configuration. + + :param task: TaskManager object containing the node. + :param ld_grouped: Dictionary of logical disks, grouped by controller. + + :returns: tuple of 1) dictionary containing operation name (create), + pending items, and task monitor URIs, and 2) flag indicating if + reboot is required. + """ + node = task.node + reboot_required = False + raid_configs = {'operation': 'create', 'pending': {}} + for controller, logical_disks in ld_grouped.items(): + iter_logical_disks = iter(logical_disks) + for logical_disk in iter_logical_disks: + response = create_virtual_disk( + task, + raid_controller=logical_disk.get('controller'), + physical_disks=logical_disk['physical_disks'], + raid_level=logical_disk['raid_level'], + size_bytes=logical_disk['size_bytes'], + disk_name=logical_disk.get('name'), + span_length=logical_disk.get('span_length'), + span_depth=logical_disk.get('span_depth'), + error_handler=self.volume_create_error_handler) + if (response is not None + and hasattr(response, 'task_monitor_uri')): + raid_configs.setdefault('task_monitor_uri', []).append( + response.task_monitor_uri) + reboot_required = True + # Don't process any on this controller until these + # created to avoid failures where only 1 request + # per controller can be submitted for non-immediate + break + # Append remaining disks for this controller, if any left + for logical_disk in iter_logical_disks: + raid_configs['pending'].setdefault(controller, []).append( + logical_disk) + + node.set_driver_internal_info('raid_configs', raid_configs) + + return raid_configs, reboot_required + + def _submit_delete_configuration(self, task): + """Processes and submits requests for deleting virtual disks. + + :param task: TaskManager object containing the node. + + :returns: tuple of 1) dictionary containing operation name (delete), + flag to indicate if any disks remaining, and task monitor URIs, + and 2) flag indicating if reboot is required + :raises RedfishError: if fails to get list of virtual disks + """ + node = task.node + system = redfish_utils.get_system(node) + vols_to_delete = {} + any_left = False + try: + for storage in system.storage.get_members(): + controller = (storage.storage_controllers[0] + if storage.storage_controllers else None) + controller_id = None + if controller: + controller_id = storage.identity + iter_volumes = iter(storage.volumes.get_members()) + for volume in iter_volumes: + if (volume.raid_type or volume.volume_type not in + [None, sushy.VOLUME_TYPE_RAW_DEVICE]): + if controller_id not in vols_to_delete: + vols_to_delete[controller_id] = [] + apply_time = self._get_apply_time( + storage.volumes.operation_apply_time_support) + vols_to_delete[controller_id].append(( + apply_time, volume)) + if apply_time == sushy.APPLY_TIME_ON_RESET: + # Don't process any on this controller until these + # deleted to avoid failures where only 1 request + # per controller can be submitted for non-immediate + break + any_left = any(iter_volumes) + except sushy.exceptions.SushyError as exc: + error_msg = _('Cannot get the list of volumes to delete for node ' + '%(node_uuid)s. Reason: %(error)s.' % + {'node_uuid': node.uuid, 'error': exc}) + LOG.error(error_msg) + raise exception.RedfishError(error=exc) + + self.pre_delete_configuration(task, vols_to_delete) + + reboot_required = False + raid_configs = {'operation': 'delete', 'pending': any_left} + for controller, vols_to_delete in vols_to_delete.items(): + for apply_time, volume in vols_to_delete: + response = volume.delete(apply_time=apply_time) + # only save the async tasks (task_monitors) in raid_config + if (response is not None + and hasattr(response, 'task_monitor_uri')): + raid_configs.setdefault('task_monitor_uri', []).append( + response.task_monitor_uri) + reboot_required = True + + node.set_driver_internal_info('raid_configs', raid_configs) + + return raid_configs, reboot_required + + def _get_apply_time(self, apply_time_support): + """Gets apply time for RAID operations + + :param apply_time_support: Supported apply times + :returns: None, if supported apply times not specified. Otherwise + Immediate when available, or OnReset that will require rebooting. + """ + apply_time = None + if apply_time_support and apply_time_support.mapped_supported_values: + supported_values = apply_time_support.mapped_supported_values + if sushy.APPLY_TIME_IMMEDIATE in supported_values: + apply_time = sushy.APPLY_TIME_IMMEDIATE + elif sushy.APPLY_TIME_ON_RESET in supported_values: + apply_time = sushy.APPLY_TIME_ON_RESET + return apply_time diff --git a/ironic/tests/unit/drivers/modules/redfish/test_raid.py b/ironic/tests/unit/drivers/modules/redfish/test_raid.py index 07db6f6f8a..ef3bba45e0 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_raid.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_raid.py @@ -76,7 +76,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): ) self.node = obj_utils.create_test_node( self.context, driver='redfish', driver_info=INFO_DICT) - self.mock_storage = mock.MagicMock() + self.mock_storage = mock.MagicMock(identity='RAID controller 1') self.drive_id1 = '35D38F11ACEF7BD3' self.drive_id2 = '3F5A8C54207B7233' self.drive_id3 = '32ADF365C6C1B7BD' @@ -422,6 +422,9 @@ class RedfishRAIDTestCase(db_base.DbTestCase): } ] } + resource = mock.MagicMock(spec=['resource_name']) + resource.resource_name = 'volume' + self.mock_storage.volumes.create.return_value = resource mock_get_system.return_value.storage.get_members.return_value = [ self.mock_storage] self.node.target_raid_config = target_raid_config @@ -464,6 +467,88 @@ class RedfishRAIDTestCase(db_base.DbTestCase): expected_payload2, apply_time=None ) + @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', + spec_set=True, autospec=True) + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(deploy_utils, 'get_async_step_return_state', + autospec=True) + @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) + def test_create_config_case_2_on_reset( + self, + mock_set_async_step_flags, + mock_get_async_step_return_state, + mock_node_power_action, + mock_build_agent_options, + mock_prepare_ramdisk, + mock_get_system): + + target_raid_config = { + 'logical_disks': [ + { + 'size_gb': 100, + 'raid_level': '5', + 'is_root_volume': True, + 'disk_type': 'ssd' + }, + { + 'size_gb': 500, + 'raid_level': '1', + 'disk_type': 'hdd' + } + ] + } + volumes = mock.MagicMock() + op_apply_time_support = mock.MagicMock() + op_apply_time_support.mapped_supported_values = [ + sushy.APPLY_TIME_ON_RESET] + volumes.operation_apply_time_support = op_apply_time_support + self.mock_storage.volumes = volumes + mock_get_system.return_value.storage.get_members.return_value = [ + self.mock_storage] + task_mon = mock.MagicMock() + task_mon.task_monitor_uri = '/TaskService/123' + volumes.create.return_value = task_mon + self.node.target_raid_config = target_raid_config + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.driver.raid.create_configuration(task) + pre = '/redfish/v1/Systems/1/Storage/1/Drives/' + expected_payload = { + 'Encrypted': False, + 'VolumeType': 'Mirrored', + 'RAIDType': 'RAID1', + 'CapacityBytes': 536870912000, + 'Links': { + 'Drives': [ + {'@odata.id': pre + self.drive_id1}, + {'@odata.id': pre + self.drive_id2} + ] + } + } + expected_raid_configs = { + 'operation': 'create', + 'pending': {'RAID controller 1': [ + {'controller': 'RAID controller 1', + 'disk_type': 'ssd', + 'is_root_volume': True, + 'physical_disks': [self.drive_id5, + self.drive_id6, + self.drive_id7], + 'raid_level': '5', + 'size_bytes': 107374182400, + 'span_depth': 1, + 'span_length': 3.0}]}, + 'task_monitor_uri': ['/TaskService/123']} + self.assertEqual( + self.mock_storage.volumes.create.call_count, 1) + self.mock_storage.volumes.create.assert_called_with( + expected_payload, apply_time=sushy.APPLY_TIME_ON_RESET) + self.assertEqual( + expected_raid_configs, + task.node.driver_internal_info.get('raid_configs')) + @mock.patch.object(redfish_boot.RedfishVirtualMediaBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @@ -574,6 +659,9 @@ class RedfishRAIDTestCase(db_base.DbTestCase): } ] } + resource = mock.MagicMock(spec=['resource_name']) + resource.resource_name = 'volume' + self.mock_storage.volumes.create.return_value = resource mock_get_system.return_value.storage.get_members.return_value = [ self.mock_storage] self.node.target_raid_config = target_raid_config @@ -686,6 +774,9 @@ class RedfishRAIDTestCase(db_base.DbTestCase): } ] } + resource = mock.MagicMock(spec=['resource_name']) + resource.resource_name = 'volume' + self.mock_storage.volumes.create.return_value = resource mock_get_system.return_value.storage.get_members.return_value = [ self.mock_storage] self.node.target_raid_config = target_raid_config @@ -797,6 +888,9 @@ class RedfishRAIDTestCase(db_base.DbTestCase): } ] } + resource = mock.MagicMock(spec=['resource_name']) + resource.resource_name = 'volume' + self.mock_storage.volumes.create.return_value = resource mock_get_system.return_value.storage.get_members.return_value = [ self.mock_storage] self.node.target_raid_config = target_raid_config @@ -913,7 +1007,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): shared=True) as task: task.driver.raid.delete_configuration(task) self.assertEqual(mock_volumes[0].delete.call_count, 1) - self.assertEqual(mock_volumes[1].delete.call_count, 1) + self.assertEqual(mock_volumes[1].delete.call_count, 0) mock_set_async_step_flags.assert_called_once_with( task.node, reboot=True, skip_current_step=True, polling=True) mock_get_async_step_return_state.assert_called_once_with( @@ -921,6 +1015,11 @@ class RedfishRAIDTestCase(db_base.DbTestCase): mock_node_power_action.assert_called_once_with(task, states.REBOOT) mock_build_agent_options.assert_called_once_with(task.node) self.assertEqual(mock_prepare_ramdisk.call_count, 1) + self.assertEqual( + {'operation': 'delete', + 'pending': True, + 'task_monitor_uri': ['/TaskService/123']}, + task.node.driver_internal_info.get('raid_configs')) def test_volume_create_error_handler(self, mock_get_system): volume_collection = self.mock_storage.volumes @@ -956,22 +1055,6 @@ class RedfishRAIDTestCase(db_base.DbTestCase): task, sushy_error, volume_collection, expected_payload ) - def test_validate(self, mock_get_system): - with task_manager.acquire(self.context, self.node.uuid, - shared=True) as task: - task.node.properties['vendor'] = "Supported vendor" - - task.driver.raid.validate(task) - - def test_validate_unsupported_vendor(self, mock_get_system): - with task_manager.acquire(self.context, self.node.uuid, - shared=True) as task: - task.node.properties['vendor'] = "Dell Inc." - - self.assertRaisesRegex(exception.InvalidParameterValue, - "with vendor Dell.Inc.", - task.driver.raid.validate, task) - def test_validate_raid_config(self, mock_get_system): raid_config = { 'logical_disks': [ @@ -1077,8 +1160,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): raid = redfish_raid.RedfishRAID() result = raid._raid_config_in_progress( - task, {'task_monitor_uri': '/TaskService/123', - 'operation': 'create'}) + task, '/TaskService/123', 'create') self.assertEqual(False, result) mock_info.assert_called_once() @@ -1094,8 +1176,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): raid = redfish_raid.RedfishRAID() result = raid._raid_config_in_progress( - task, {'task_monitor_uri': '/TaskService/123', - 'operation': 'create'}) + task, '/TaskService/123', 'create') self.assertEqual(False, result) mock_info.assert_called_once() @@ -1112,8 +1193,7 @@ class RedfishRAIDTestCase(db_base.DbTestCase): raid = redfish_raid.RedfishRAID() result = raid._raid_config_in_progress( - task, {'task_monitor_uri': '/TaskService/123', - 'operation': 'create'}) + task, '/TaskService/123', 'create') self.assertEqual(True, result) mock_debug.assert_called_once() @@ -1138,7 +1218,151 @@ class RedfishRAIDTestCase(db_base.DbTestCase): raid = redfish_raid.RedfishRAID() result = raid._raid_config_in_progress( - task, {'task_monitor_uri': '/TaskService/123', - 'operation': 'create'}) + task, '/TaskService/123', 'create') self.assertEqual(False, result) mock_error.assert_called_once() + + @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + autospec=True) + @mock.patch.object(manager_utils, 'notify_conductor_resume_deploy', + autospec=True) + @mock.patch.object(redfish_utils, 'get_task_monitor', + autospec=True) + def test__check_node_raid_config_deploy( + self, mock_get_task_monitor, mock_resume_deploy, + mock_resume_clean, mock_get_system): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.node.deploy_step = {'priority': 100, 'interface': 'raid', + 'step': 'delete_configuration', + 'argsinfo': {}} + info = task.node.driver_internal_info + info['raid_configs'] = {'operation': 'delete', 'pending': {}, + 'task_monitor_uri': ['/TaskService/123']} + task.node.driver_internal_info = info + task.node.save() + + mock_task_monitor = mock_get_task_monitor.return_value + mock_task_monitor.is_processing = False + mock_task_monitor.response.status_code = 200 + + raid = redfish_raid.RedfishRAID() + raid._check_node_raid_config(task) + + mock_resume_deploy.assert_called_with(task) + mock_resume_clean.assert_not_called() + + @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + autospec=True) + @mock.patch.object(manager_utils, 'notify_conductor_resume_deploy', + autospec=True) + @mock.patch.object(redfish_utils, 'get_task_monitor', + autospec=True) + def test__check_node_raid_config_clean( + self, mock_get_task_monitor, mock_resume_deploy, + mock_resume_clean, mock_get_system): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.node.clean_step = {'interface': 'raid', + 'step': 'delete_configuration', + 'argsinfo': {}} + info = task.node.driver_internal_info + info['raid_configs'] = {'operation': 'delete', 'pending': {}, + 'task_monitor_uri': ['/TaskService/123']} + task.node.driver_internal_info = info + task.node.save() + + mock_task_monitor = mock_get_task_monitor.return_value + mock_task_monitor.is_processing = False + mock_task_monitor.response.status_code = 200 + + raid = redfish_raid.RedfishRAID() + raid._check_node_raid_config(task) + + mock_resume_deploy.assert_not_called() + mock_resume_clean.assert_called_with(task) + + @mock.patch.object(redfish_utils, 'get_task_monitor', + autospec=True) + @mock.patch.object(redfish_raid.RedfishRAID, + '_submit_create_configuration', autospec=True) + @mock.patch.object(redfish_raid.RedfishRAID, + '_submit_delete_configuration', autospec=True) + @mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True) + def test__check_node_raid_config_pending_create( + self, mock_reboot, mock_submit_delete, mock_submit_create, + mock_get_task_monitor, mock_get_system): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.node.clean_step = {'interface': 'raid', + 'step': 'create_configuration', + 'argsinfo': {}} + info = task.node.driver_internal_info + raid_configs = { + 'operation': 'create', + 'pending': {'RAID controller 1': [ + {'controller': 'RAID controller 1', + 'disk_type': 'ssd', + 'is_root_volume': True, + 'physical_disks': [self.drive_id5, + self.drive_id6, + self.drive_id7], + 'raid_level': '5', + 'size_bytes': 107374182400, + 'span_depth': 1, + 'span_length': 3.0}]}, + 'task_monitor_uri': ['/TaskService/123']} + info['raid_configs'] = raid_configs + task.node.driver_internal_info = info + task.node.save() + + mock_task_monitor = mock_get_task_monitor.return_value + mock_task_monitor.is_processing = False + mock_task_monitor.response.status_code = 200 + + mock_submit_create.return_value = ({}, True) + + raid = redfish_raid.RedfishRAID() + raid._check_node_raid_config(task) + + mock_submit_create.assert_called_with( + raid, task, raid_configs['pending']) + mock_submit_delete.assert_not_called() + mock_reboot.assert_called_with(task) + + @mock.patch.object(redfish_utils, 'get_task_monitor', + autospec=True) + @mock.patch.object(redfish_raid.RedfishRAID, + '_submit_create_configuration', autospec=True) + @mock.patch.object(redfish_raid.RedfishRAID, + '_submit_delete_configuration', autospec=True) + @mock.patch.object(deploy_utils, 'reboot_to_finish_step', autospec=True) + def test__check_node_raid_config_pending_delete( + self, mock_reboot, mock_submit_delete, mock_submit_create, + mock_get_task_monitor, mock_get_system): + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + task.node.clean_step = {'interface': 'raid', + 'step': 'delete_configuration', + 'argsinfo': {}} + info = task.node.driver_internal_info + raid_configs = { + 'operation': 'delete', + 'pending': True, + 'task_monitor_uri': ['/TaskService/123']} + info['raid_configs'] = raid_configs + task.node.driver_internal_info = info + task.node.save() + + mock_task_monitor = mock_get_task_monitor.return_value + mock_task_monitor.is_processing = False + mock_task_monitor.response.status_code = 200 + + mock_submit_delete.return_value = ({}, False) + + raid = redfish_raid.RedfishRAID() + raid._check_node_raid_config(task) + + mock_submit_create.assert_not_called() + mock_submit_delete.assert_called_with(raid, task) + mock_reboot.assert_not_called() diff --git a/releasenotes/notes/fix-redfish-raid-onreset-workflow-bfa44de6b0263a1f.yaml b/releasenotes/notes/fix-redfish-raid-onreset-workflow-bfa44de6b0263a1f.yaml new file mode 100644 index 0000000000..6cb8bdc1b0 --- /dev/null +++ b/releasenotes/notes/fix-redfish-raid-onreset-workflow-bfa44de6b0263a1f.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + Fixes the ``redfish`` hardware type RAID device creation and deletion when + creating or deleting more than 1 logical disk on RAID controllers that + require rebooting and do not allow more than 1 running task per RAID + controller. Before this fix 2nd logical disk would fail to be created or + deleted. With this change it is now possible to use ``redfish`` ``raid`` + interface on iDRAC systems.