From f0e0ef634e55f46eda4942e9665d35c70d305365 Mon Sep 17 00:00:00 2001 From: Christopher Dearborn Date: Wed, 2 Sep 2020 22:11:04 -0400 Subject: [PATCH] Redfish driver firmware update This patch adds support for performing firmware updates using the redfish and idrac hardware types. Co-Authored-By: Aija Jaunteva Change-Id: Ie8f3f68c4a771121ec0ee13ce9349c7cd2b1e567 Depends-On: https://review.opendev.org/#/c/745950 Story: 2008140 Task: 40872 --- doc/source/admin/drivers/idrac.rst | 11 +- doc/source/admin/drivers/redfish.rst | 130 ++++ ironic/conf/redfish.py | 10 + ironic/drivers/modules/redfish/management.py | 337 +++++++++++ ironic/drivers/modules/redfish/utils.py | 65 +- .../modules/redfish/test_management.py | 564 ++++++++++++++++++ .../drivers/modules/redfish/test_utils.py | 17 + .../drivers/third_party_driver_mock_specs.py | 3 + .../unit/drivers/third_party_driver_mocks.py | 3 + ...fish-firmware-update-a06d0624325a66ca.yaml | 9 + 10 files changed, 1133 insertions(+), 16 deletions(-) create mode 100644 releasenotes/notes/redfish-firmware-update-a06d0624325a66ca.yaml diff --git a/doc/source/admin/drivers/idrac.rst b/doc/source/admin/drivers/idrac.rst index 796b09edb7..35acc3ba59 100644 --- a/doc/source/admin/drivers/idrac.rst +++ b/doc/source/admin/drivers/idrac.rst @@ -17,7 +17,7 @@ hardware types, though with smaller feature sets. Key features of the Dell iDRAC driver include: * Out-of-band node inspection -* Boot device management +* Boot device management and firmware management * Power management * RAID controller management and RAID volume configuration * BIOS settings configuration @@ -29,7 +29,7 @@ The ``idrac`` hardware type supports the following Ironic interfaces: * `BIOS Interface`_: BIOS management * `Inspect Interface`_: Hardware inspection -* Management Interface: Boot device management +* `Management Interface`_: Boot device and firmware management * Power Interface: Power management * `RAID Interface`_: RAID controller and disk management * `Vendor Interface`_: BIOS management @@ -265,6 +265,13 @@ The ``idrac-redfish`` inspect interface does not currently set ``pxe_enabled`` on the ports. The user should ensure that ``pxe_enabled`` is set correctly on the ports following inspection with the ``idrac-redfish`` inspect interface. +Management Interface +==================== + +The management interface for ``idrac-redfish`` supports updating firmware on +nodes using a manual cleaning step. + +See :doc:`/admin/drivers/redfish` for more information on firmware update support. RAID Interface ============== diff --git a/doc/source/admin/drivers/redfish.rst b/doc/source/admin/drivers/redfish.rst index c2740452a1..46f85644d8 100644 --- a/doc/source/admin/drivers/redfish.rst +++ b/doc/source/admin/drivers/redfish.rst @@ -249,6 +249,136 @@ scenario. Make sure to use add the simple-init_ element when building the IPA ramdisk. +Firmware update using manual cleaning step +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``redfish`` hardware type supports updating the firmware on nodes using a +manual cleaning step. + +The firmware update cleaning step allows one or more firmware updates to be +applied to a node. If multiple updates are specified, then they are applied +sequentially in the order given. The server is rebooted once per update. +If a failure occurs, the cleaning step immediately fails which may result +in some updates not being applied. If the node is placed into maintenance +mode while a firmware update cleaning step is running that is performing +multiple firmware updates, the update in progress will complete, and processing +of the remaining updates will pause. When the node is taken out of maintenance +mode, processing of the remaining updates will continue. + +When updating the BMC firmware, the BMC may become unavailable for a period of +time as it resets. In this case, it may be desireable to have the cleaning step +wait after the update has been applied before indicating that the +update was successful. This allows the BMC time to fully reset before further +operations are carried out against it. To cause the cleaning step to wait after +applying an update, an optional ``wait`` argument may be specified in the +firmware image dictionary. The value of this argument indicates the number of +seconds to wait following the update. If the ``wait`` argument is not +specified, then this is equivalent to ``wait 0``, meaning that it will not +wait and immediately proceed with the next firmware update if there is one, +or complete the cleaning step if not. + +The ``update_firmware`` cleaning step accepts JSON in the following format:: + + [{ + "interface": "management", + "step": "update_firmware", + "args": { + "firmware_images":[ + { + "url": "", + "wait": + }, + { + "url": "" + }, + ... + ] + } + }] + +The different attributes of the ``update_firmware`` cleaning step are as follows: + +.. csv-table:: + :header: "Attribute", "Description" + :widths: 30, 120 + + "``interface``", "Interface of the cleaning step. Must be ``management`` for firmware update" + "``step``", "Name of cleaning step. Must be ``update_firmware`` for firmware update" + "``args``", "Keyword-argument entry (: ) being passed to cleaning step" + "``args.firmware_images``", "Ordered list of dictionaries of firmware images to be applied" + +Each firmware image dictionary, is of the form:: + + { + "url": "", + "wait": + } + +The ``url`` argument in the firmware image dictionary is mandatory, while the +``wait`` argument is optional. + + +.. note:: + Only ``http`` and ``https`` URLs are currently supported in the ``url`` + argument. + +.. note:: + At the present time, targets for the firmware update cannot be specified. + In testing, the BMC applied the update to all applicable targets on the + node. It is assumed that the BMC knows what components a given firmware + image is applicable to. + +To perform a firmware update, first download the firmware to a web server that +the BMC has network access to. This could be the ironic conductor web server +or another web server on the BMC network. Using a web browser, curl, or similar +tool on a server that has network access to the BMC, try downloading +the firmware to verify that the URLs are correct and that the web server is +configured properly. + +Next, construct the JSON for the firmware update cleaning step to be executed. +When launching the firmware update, the JSON may be specified on the command +line directly or in a file. The following +example shows one cleaning step that installs two firmware updates. The first +updates the BMC firmware followed by a five minute wait to allow the BMC time +to start back up. The second updates the firmware on all applicable NICs.:: + + [{ + "interface": "management", + "step": "update_firmware", + "args": { + "firmware_images":[ + { + "url": "http://192.0.2.10/BMC_4_22_00_00.EXE", + "wait": 300 + }, + { + "url": "https://192.0.2.10/NIC_19.0.12_A00.EXE" + } + ] + } + }] + +Finally, launch the firmware update cleaning step against the node. The +following example assumes the above JSON is in a file named +``firmware_update.json``:: + + openstack baremetal node clean --clean-steps firmware_update.json + +In the following example, the JSON is specified directly on the command line:: + + openstack baremetal node clean --clean-steps '[{"interface": "management", "step": "update_firmware", "args": {"firmware_images":[{"url": "http://192.0.2.10/BMC_4_22_00_00.EXE", "wait": 300}, {"url": "https://192.0.2.10/NIC_19.0.12_A00.EXE"}]}}]' + +.. note:: + Firmware updates may take some time to complete. If a firmware update + cleaning step consistently times out, then consider performing fewer + firmware updates in the cleaning step or increasing + ``clean_callback_timeout`` in ironic.conf to increase the timeout value. + +.. warning:: + Warning: Removing power from a server while it is in the process of updating + firmware may result in devices in the server, or the server itself becoming + inoperable. + .. _Redfish: http://redfish.dmtf.org/ .. _Sushy: https://opendev.org/openstack/sushy .. _TLS: https://en.wikipedia.org/wiki/Transport_Layer_Security diff --git a/ironic/conf/redfish.py b/ironic/conf/redfish.py index 7c3eef31ae..ad522865eb 100644 --- a/ironic/conf/redfish.py +++ b/ironic/conf/redfish.py @@ -80,6 +80,16 @@ opts = [ 'or as the octal number ``0o644`` in Python. ' 'This setting must be set to the octal number ' 'representation, meaning starting with ``0o``.')), + cfg.IntOpt('firmware_update_status_interval', + min=0, + default=60, + help=_('Number of seconds to wait between checking for ' + 'completed firmware update tasks')), + cfg.IntOpt('firmware_update_fail_interval', + min=0, + default=60, + help=_('Number of seconds to wait between checking for ' + 'failed firmware update tasks')), ] diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py index 22ef03b491..e4fec1a2e9 100644 --- a/ironic/drivers/modules/redfish/management.py +++ b/ironic/drivers/modules/redfish/management.py @@ -15,8 +15,11 @@ import collections +from futurist import periodics +from ironic_lib import metrics_utils from oslo_log import log from oslo_utils import importutils +from oslo_utils import timeutils from ironic.common import boot_devices from ironic.common import boot_modes @@ -24,12 +27,17 @@ from ironic.common import components from ironic.common import exception from ironic.common.i18n import _ from ironic.common import indicator_states +from ironic.common import states from ironic.common import utils from ironic.conductor import task_manager +from ironic.conductor import utils as manager_utils +from ironic.conf import CONF from ironic.drivers import base +from ironic.drivers.modules import deploy_utils from ironic.drivers.modules.redfish import utils as redfish_utils LOG = log.getLogger(__name__) +METRICS = metrics_utils.get_metrics_logger(__name__) sushy = importutils.try_import('sushy') @@ -663,3 +671,332 @@ class RedfishManagement(base.ManagementInterface): "node %(uuid)s") % {'indicator': indicator, 'component': component, 'uuid': task.node.uuid}) + + @METRICS.timer('RedfishManagement.update_firmware') + @base.clean_step(priority=0, abortable=False, argsinfo={ + 'firmware_images': { + 'description': ( + 'A list of firmware images to apply.' + ), + 'required': True + }}) + def update_firmware(self, task, firmware_images): + """Updates the firmware on the node. + + :param task: a TaskManager instance containing the node to act on. + :param firmware_images: A list of firmware images are to apply. + :returns: None if it is completed. + :raises: RedfishError on an error from the Sushy library. + """ + node = task.node + + LOG.debug('Updating firmware on node %(node_uuid)s with firmware ' + '%(firmware_images)s', + {'node_uuid': node.uuid, + 'firmware_images': firmware_images}) + + update_service = redfish_utils.get_update_service(task.node) + + # The cleaning infrastructure has an exclusive lock on the node, so + # there is no need to get one here. + self._apply_firmware_update(node, update_service, firmware_images) + + # set_async_step_flags calls node.save() + deploy_utils.set_async_step_flags( + node, + reboot=True, + skip_current_step=True, + polling=True) + + manager_utils.node_power_action(task, states.REBOOT) + + return deploy_utils.get_async_step_return_state(task.node) + + def _apply_firmware_update(self, node, update_service, firmware_updates): + """Applies the next firmware update to the node + + Applies the first firmware update in the firmware_updates list to + the node. + + Note that the caller must have an exclusive lock on the node and + the caller must ensure node.save() is called after making this + call. + + :param node: the node to apply the next update to + :param update_service: the sushy firmware update service + :param firmware_updates: the remaining firmware updates to apply + """ + + firmware_update = firmware_updates[0] + firmware_url = firmware_update['url'] + + LOG.debug('Applying firmware %(firmware_image)s to node ' + '%(node_uuid)s', + {'firmware_image': firmware_url, + 'node_uuid': node.uuid}) + + task_monitor = update_service.simple_update(firmware_url) + + driver_internal_info = node.driver_internal_info + firmware_update['task_monitor'] = task_monitor.task_monitor + driver_internal_info['firmware_updates'] = firmware_updates + node.driver_internal_info = driver_internal_info + + def _continue_firmware_updates(self, task, update_service, + firmware_updates): + """Continues processing the firmware updates + + Continues to process the firmware updates on the node. + + Note that the caller must have an exclusive lock on the node. + + :param task: a TaskManager instance containing the node to act on. + :param update_service: the sushy firmware update service + :param firmware_updates: the remaining firmware updates to apply + """ + + node = task.node + firmware_update = firmware_updates[0] + wait_interval = firmware_update.get('wait') + if wait_interval: + time_now = str(timeutils.utcnow().isoformat()) + firmware_update['wait_start_time'] = time_now + + LOG.debug('Waiting at %(time)s for %(seconds)s seconds after ' + 'firmware update %(firmware_image)s on node %(node)s', + {'time': time_now, + 'seconds': wait_interval, + 'firmware_image': firmware_update['url'], + 'node': node.uuid}) + + driver_internal_info = node.driver_internal_info + driver_internal_info['firmware_updates'] = firmware_updates + node.driver_internal_info = driver_internal_info + node.save() + return + + if len(firmware_updates) == 1: + self._clear_firmware_updates(node) + + LOG.info('Firmware updates completed for node %(node)s', + {'node': node.uuid}) + + manager_utils.notify_conductor_resume_clean(task) + else: + firmware_updates.pop(0) + self._apply_firmware_update(node, + update_service, + firmware_updates) + node.save() + manager_utils.node_power_action(task, states.REBOOT) + + def _clear_firmware_updates(self, node): + """Clears firmware updates from driver_internal_info + + Note that the caller must have an exclusive lock on the node. + + :param node: the node to clear the firmware updates from + """ + driver_internal_info = node.driver_internal_info + driver_internal_info.pop('firmware_updates', None) + node.driver_internal_info = driver_internal_info + node.save() + + @METRICS.timer('RedfishManagement._query_firmware_update_failed') + @periodics.periodic( + spacing=CONF.redfish.firmware_update_fail_interval, + enabled=CONF.redfish.firmware_update_fail_interval > 0) + def _query_firmware_update_failed(self, manager, context): + """Periodic job to check for failed firmware updates.""" + + filters = {'reserved': False, 'provision_state': states.CLEANFAIL, + 'maintenance': True} + + fields = ['driver_internal_info'] + + node_list = manager.iter_nodes(fields=fields, filters=filters) + for (node_uuid, driver, conductor_group, + driver_internal_info) in node_list: + try: + lock_purpose = 'checking async firmware update failed.' + with task_manager.acquire(context, node_uuid, + purpose=lock_purpose, + shared=True) as task: + if not isinstance(task.driver.management, + RedfishManagement): + continue + + firmware_updates = driver_internal_info.get( + 'firmware_updates') + if not firmware_updates: + continue + + node = task.node + + # A firmware update failed. Discard any remaining firmware + # updates so when the user takes the node out of + # maintenance mode, pending firmware updates do not + # automatically continue. + LOG.warning('Firmware update failed for node %(node)s. ' + 'Discarding remaining firmware updates.', + {'node': node.uuid}) + + task.upgrade_lock() + self._clear_firmware_updates(node) + + except exception.NodeNotFound: + LOG.info('During _query_firmware_update_failed, node ' + '%(node)s was not found and presumed deleted by ' + 'another process.', {'node': node_uuid}) + except exception.NodeLocked: + LOG.info('During _query_firmware_update_failed, node ' + '%(node)s was already locked by another process. ' + 'Skip.', {'node': node_uuid}) + + @METRICS.timer('RedfishManagement._query_firmware_update_status') + @periodics.periodic( + spacing=CONF.redfish.firmware_update_status_interval, + enabled=CONF.redfish.firmware_update_status_interval > 0) + def _query_firmware_update_status(self, manager, context): + """Periodic job to check firmware update tasks.""" + + filters = {'reserved': False, 'provision_state': states.CLEANWAIT} + fields = ['driver_internal_info'] + + node_list = manager.iter_nodes(fields=fields, filters=filters) + for (node_uuid, driver, conductor_group, + driver_internal_info) in node_list: + try: + lock_purpose = 'checking async firmware update tasks.' + with task_manager.acquire(context, node_uuid, + purpose=lock_purpose, + shared=True) as task: + if not isinstance(task.driver.management, + RedfishManagement): + continue + + firmware_updates = driver_internal_info.get( + 'firmware_updates') + if not firmware_updates: + continue + + self._check_node_firmware_update(task) + + except exception.NodeNotFound: + LOG.info('During _query_firmware_update_status, node ' + '%(node)s was not found and presumed deleted by ' + 'another process.', {'node': node_uuid}) + except exception.NodeLocked: + LOG.info('During _query_firmware_update_status, node ' + '%(node)s was already locked by another process. ' + 'Skip.', {'node': node_uuid}) + + @METRICS.timer('RedfishManagement._check_node_firmware_update') + def _check_node_firmware_update(self, task): + """Check the progress of running firmware update on a node.""" + + node = task.node + + firmware_updates = node.driver_internal_info['firmware_updates'] + current_update = firmware_updates[0] + + try: + update_service = redfish_utils.get_update_service(node) + except exception.RedfishConnectionError as e: + # If the BMC firmware is being updated, the BMC will be + # unavailable for some amount of time. + LOG.warning('Unable to communicate with firmware update service ' + 'on node %(node)s. Will try again on the next poll. ' + 'Error: %(error)s', + {'node': node.uuid, + 'error': e}) + return + + wait_start_time = current_update.get('wait_start_time') + if wait_start_time: + wait_start = timeutils.parse_isotime(wait_start_time) + + elapsed_time = timeutils.utcnow(True) - wait_start + if elapsed_time.seconds >= current_update['wait']: + LOG.debug('Finished waiting after firmware update ' + '%(firmware_image)s on node %(node)s. ' + 'Elapsed time: %(seconds)s seconds', + {'firmware_image': current_update['url'], + 'node': node.uuid, + 'seconds': elapsed_time.seconds}) + current_update.pop('wait', None) + current_update.pop('wait_start_time', None) + + task.upgrade_lock() + self._continue_firmware_updates(task, + update_service, + firmware_updates) + else: + LOG.debug('Continuing to wait after firmware update ' + '%(firmware_image)s on node %(node)s. ' + 'Elapsed time: %(seconds)s seconds', + {'firmware_image': current_update['url'], + 'node': node.uuid, + 'seconds': elapsed_time.seconds}) + + return + + try: + task_monitor = update_service.get_task_monitor( + current_update['task_monitor']) + except sushy.exceptions.ResourceNotFoundError: + # The BMC deleted the Task before we could query it + LOG.warning('Firmware update completed for node %(node)s, ' + 'firmware %(firmware_image)s, but success of the ' + 'update is unknown. Assuming update was successful.', + {'node': node.uuid, + 'firmware_image': current_update['url']}) + task.upgrade_lock() + self._continue_firmware_updates(task, + update_service, + firmware_updates) + return + + if not task_monitor.is_processing: + # The last response does not necessarily contain a Task, + # so get it + sushy_task = task_monitor.get_task() + + # Only parse the messages if the BMC did not return parsed + # messages + messages = [] + if not sushy_task.messages[0].message: + sushy_task.parse_messages() + + messages = [m.message for m in sushy_task.messages] + + if (sushy_task.task_state == sushy.TASK_STATE_COMPLETED + and sushy_task.task_status in + [sushy.HEALTH_OK, sushy.HEALTH_WARNING]): + LOG.info('Firmware update succeeded for node %(node)s, ' + 'firmware %(firmware_image)s: %(messages)s', + {'node': node.uuid, + 'firmware_image': current_update['url'], + 'messages': ", ".join(messages)}) + + task.upgrade_lock() + self._continue_firmware_updates(task, + update_service, + firmware_updates) + else: + error_msg = (_('Firmware update failed for node %(node)s, ' + 'firmware %(firmware_image)s. ' + 'Error: %(errors)s') % + {'node': node.uuid, + 'firmware_image': current_update['url'], + 'errors': ", ".join(messages)}) + LOG.error(error_msg) + + task.upgrade_lock() + self._clear_firmware_updates(node) + manager_utils.cleaning_error_handler(task, error_msg) + else: + LOG.debug('Firmware update in progress for node %(node)s, ' + 'firmware %(firmware_image)s.', + {'node': node.uuid, + 'firmware_image': current_update['url']}) diff --git a/ironic/drivers/modules/redfish/utils.py b/ironic/drivers/modules/redfish/utils.py index 5c20667d0f..705fc997e2 100644 --- a/ironic/drivers/modules/redfish/utils.py +++ b/ironic/drivers/modules/redfish/utils.py @@ -245,6 +245,23 @@ class SessionCache(object): cls._sessions.pop(session_key, None) +def get_update_service(node): + """Get a node's update service. + + :param node: an Ironic node object + :raises: RedfishConnectionError when it fails to connect to Redfish + :raises: RedfishError when the UpdateService is not registered in Redfish + """ + + try: + return _get_connection(node, lambda conn: conn.get_update_service()) + except sushy.exceptions.MissingAttributeError as e: + LOG.error('The Redfish UpdateService was not found for ' + 'node %(node)s. Error %(error)s', + {'node': node.uuid, 'error': e}) + raise exception.RedfishError(error=e) + + def get_system(node): """Get a Redfish System that represents a node. @@ -253,40 +270,60 @@ def get_system(node): :raises: RedfishError if the System is not registered in Redfish """ driver_info = parse_driver_info(node) - system_id = driver_info.get('system_id') + system_id = driver_info['system_id'] + + try: + return _get_connection( + node, + lambda conn, system_id: conn.get_system(system_id), + system_id) + except sushy.exceptions.ResourceNotFoundError as e: + LOG.error('The Redfish System "%(system)s" was not found for ' + 'node %(node)s. Error %(error)s', + {'system': system_id or '', + 'node': node.uuid, 'error': e}) + raise exception.RedfishError(error=e) + + +def _get_connection(node, lambda_fun, *args): + """Get a Redfish connection to a node. + + This method gets a Redfish connection to a node by calling the passed + lambda function, and returns the sushy object returned by the function. + + :param node: an Ironic node object + :param lambda_fun: the function to call to retrieve the desired sushy + object + :param args: the arguments to pass to the function + :returns: the sushy object returned by the lambda function + :raises: RedfishConnectionError when it fails to connect to Redfish + """ + driver_info = parse_driver_info(node) @retrying.retry( retry_on_exception=( lambda e: isinstance(e, exception.RedfishConnectionError)), stop_max_attempt_number=CONF.redfish.connection_attempts, wait_fixed=CONF.redfish.connection_retry_interval * 1000) - def _get_system(): + def _get_cached_connection(lambda_fun, *args): try: with SessionCache(driver_info) as conn: - return conn.get_system(system_id) + return lambda_fun(conn, *args) - except sushy.exceptions.ResourceNotFoundError as e: - LOG.error('The Redfish System "%(system)s" was not found for ' - 'node %(node)s. Error %(error)s', - {'system': system_id or '', - 'node': node.uuid, 'error': e}) - raise exception.RedfishError(error=e) # TODO(lucasagomes): We should look at other types of # ConnectionError such as AuthenticationError or SSLError and stop # retrying on them except sushy.exceptions.ConnectionError as e: LOG.warning('For node %(node)s, got a connection error from ' 'Redfish at address "%(address)s" using auth type ' - '"%(auth_type)s" when fetching System "%(system)s". ' - 'Error: %(error)s', - {'system': system_id or '', - 'address': driver_info['address'], + '"%(auth_type)s". Error: %(error)s', + {'address': driver_info['address'], 'auth_type': driver_info['auth_type'], 'node': node.uuid, 'error': e}) raise exception.RedfishConnectionError(node=node.uuid, error=e) try: - return _get_system() + return _get_cached_connection(lambda_fun, *args) except exception.RedfishConnectionError as e: with excutils.save_and_reraise_exception(): LOG.error('Failed to connect to Redfish at %(address)s for ' diff --git a/ironic/tests/unit/drivers/modules/redfish/test_management.py b/ironic/tests/unit/drivers/modules/redfish/test_management.py index 60c9fd095e..827dbf9bb6 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_management.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_management.py @@ -13,6 +13,7 @@ # License for the specific language governing permissions and limitations # under the License. +import datetime from unittest import mock from oslo_utils import importutils @@ -22,7 +23,10 @@ from ironic.common import boot_modes from ironic.common import components from ironic.common import exception from ironic.common import indicator_states +from ironic.common import states from ironic.conductor import task_manager +from ironic.conductor import utils as manager_utils +from ironic.drivers.modules import deploy_utils from ironic.drivers.modules.redfish import management as redfish_mgmt from ironic.drivers.modules.redfish import utils as redfish_utils from ironic.tests.unit.db import base as db_base @@ -691,3 +695,563 @@ class RedfishManagementTestCase(db_base.DbTestCase): mock_get_system.assert_called_once_with(task.node) self.assertEqual(indicator_states.ON, state) + + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + @mock.patch.object(deploy_utils, 'get_async_step_return_state', + autospec=True) + @mock.patch.object(deploy_utils, 'set_async_step_flags', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test_update_firmware(self, mock_get_update_service, + mock_set_async_step_flags, + mock_get_async_step_return_state, + mock_node_power_action): + mock_task_monitor = mock.Mock() + mock_task_monitor.task_monitor = '/task/123' + mock_update_service = mock.Mock() + mock_update_service.simple_update.return_value = mock_task_monitor + mock_get_update_service.return_value = mock_update_service + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + task.node.save = mock.Mock() + + task.driver.management.update_firmware(task, + [{'url': 'test1'}, + {'url': 'test2'}]) + + mock_get_update_service.assert_called_once_with(task.node) + mock_update_service.simple_update.assert_called_once_with('test1') + self.assertIsNotNone(task.node + .driver_internal_info['firmware_updates']) + self.assertEqual( + [{'task_monitor': '/task/123', 'url': 'test1'}, + {'url': 'test2'}], + task.node.driver_internal_info['firmware_updates']) + mock_set_async_step_flags.assert_called_once_with( + task.node, reboot=True, skip_current_step=True, polling=True) + mock_get_async_step_return_state.assert_called_once_with( + task.node) + mock_node_power_action.assert_called_once_with(task, states.REBOOT) + + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_failed(self, mock_acquire): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + task = mock.Mock(node=self.node, + driver=mock.Mock(management=management)) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + management._clear_firmware_updates = mock.Mock() + + management._query_firmware_update_failed(mock_manager, + self.context) + + management._clear_firmware_updates.assert_called_once_with(self.node) + + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_failed_not_redfish(self, mock_acquire): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'not-redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + task = mock.Mock(node=self.node, + driver=mock.Mock(management=mock.Mock())) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + management._clear_firmware_updates = mock.Mock() + + management._query_firmware_update_failed(mock_manager, + self.context) + + management._clear_firmware_updates.assert_not_called() + + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_failed_no_firmware_upd(self, mock_acquire): + driver_internal_info = {'something': 'else'} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + task = mock.Mock(node=self.node, + driver=mock.Mock(management=management)) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + management._clear_firmware_updates = mock.Mock() + + management._query_firmware_update_failed(mock_manager, + self.context) + + management._clear_firmware_updates.assert_not_called() + + @mock.patch.object(redfish_mgmt.LOG, 'info', autospec=True) + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_failed_node_notfound(self, mock_acquire, + mock_log): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + mock_acquire.side_effect = exception.NodeNotFound + management._clear_firmware_updates = mock.Mock() + + management._query_firmware_update_failed(mock_manager, + self.context) + + management._clear_firmware_updates.assert_not_called() + self.assertTrue(mock_log.called) + + @mock.patch.object(redfish_mgmt.LOG, 'info', autospec=True) + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_failed_node_locked( + self, mock_acquire, mock_log): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + mock_acquire.side_effect = exception.NodeLocked + management._clear_firmware_updates = mock.Mock() + + management._query_firmware_update_failed(mock_manager, + self.context) + + management._clear_firmware_updates.assert_not_called() + self.assertTrue(mock_log.called) + + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_status(self, mock_acquire): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + task = mock.Mock(node=self.node, + driver=mock.Mock(management=management)) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + management._check_node_firmware_update = mock.Mock() + + management._query_firmware_update_status(mock_manager, + self.context) + + management._check_node_firmware_update.assert_called_once_with(task) + + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_status_not_redfish(self, mock_acquire): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'not-redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + task = mock.Mock(node=self.node, + driver=mock.Mock(management=mock.Mock())) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + management._check_node_firmware_update = mock.Mock() + + management._query_firmware_update_status(mock_manager, + self.context) + + management._check_node_firmware_update.assert_not_called() + + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_status_no_firmware_upd(self, mock_acquire): + driver_internal_info = {'something': 'else'} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + task = mock.Mock(node=self.node, + driver=mock.Mock(management=management)) + mock_acquire.return_value = mock.MagicMock( + __enter__=mock.MagicMock(return_value=task)) + management._check_node_firmware_update = mock.Mock() + + management._query_firmware_update_status(mock_manager, + self.context) + + management._check_node_firmware_update.assert_not_called() + + @mock.patch.object(redfish_mgmt.LOG, 'info', autospec=True) + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_status_node_notfound(self, mock_acquire, + mock_log): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + mock_acquire.side_effect = exception.NodeNotFound + management._check_node_firmware_update = mock.Mock() + + management._query_firmware_update_status(mock_manager, + self.context) + + management._check_node_firmware_update.assert_not_called() + self.assertTrue(mock_log.called) + + @mock.patch.object(redfish_mgmt.LOG, 'info', autospec=True) + @mock.patch.object(task_manager, 'acquire', autospec=True) + def test__query_firmware_update_status_node_locked( + self, mock_acquire, mock_log): + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + mock_manager = mock.Mock() + node_list = [(self.node.uuid, 'redfish', '', driver_internal_info)] + mock_manager.iter_nodes.return_value = node_list + mock_acquire.side_effect = exception.NodeLocked + management._check_node_firmware_update = mock.Mock() + + management._query_firmware_update_status(mock_manager, + self.context) + + management._check_node_firmware_update.assert_not_called() + self.assertTrue(mock_log.called) + + @mock.patch.object(redfish_mgmt.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_redfish_conn_error( + self, mock_get_update_services, mock_log): + mock_get_update_services.side_effect = exception.RedfishConnectionError + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + + management = redfish_mgmt.RedfishManagement() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._check_node_firmware_update(task) + + self.assertTrue(mock_log.called) + + @mock.patch.object(redfish_mgmt.LOG, 'debug', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_wait_elapsed( + self, mock_get_update_service, mock_log): + mock_update_service = mock.Mock() + mock_get_update_service.return_value = mock_update_service + + wait_start_time = datetime.datetime.utcnow() -\ + datetime.timedelta(minutes=15) + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1', + 'wait_start_time': + wait_start_time.isoformat(), + 'wait': 1}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + management._continue_firmware_updates = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._check_node_firmware_update(task) + + self.assertTrue(mock_log.called) + management._continue_firmware_updates.assert_called_once_with( + task, + mock_update_service, + [{'task_monitor': '/task/123', 'url': 'test1'}]) + + @mock.patch.object(redfish_mgmt.LOG, 'debug', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_still_waiting( + self, mock_get_update_service, mock_log): + mock_update_service = mock.Mock() + mock_get_update_service.return_value = mock_update_service + + wait_start_time = datetime.datetime.utcnow() -\ + datetime.timedelta(minutes=1) + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1', + 'wait_start_time': + wait_start_time.isoformat(), + 'wait': 600}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + management._continue_firmware_updates = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._check_node_firmware_update(task) + + self.assertTrue(mock_log.called) + management._continue_firmware_updates.assert_not_called() + + @mock.patch.object(redfish_mgmt.LOG, 'warning', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_task_monitor_not_found( + self, mock_get_update_service, mock_log): + mock_update_service = mock.Mock() + mock_update_service.get_task_monitor.side_effect =\ + sushy.exceptions.ResourceNotFoundError( + method='GET', url='/task/123', response=mock.MagicMock()) + mock_get_update_service.return_value = mock_update_service + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + management._continue_firmware_updates = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._check_node_firmware_update(task) + + self.assertTrue(mock_log.called) + management._continue_firmware_updates.assert_called_once_with( + task, + mock_update_service, + [{'task_monitor': '/task/123', 'url': 'test1'}]) + + @mock.patch.object(redfish_mgmt.LOG, 'debug', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_in_progress(self, + mock_get_update_service, + mock_log): + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = True + mock_update_service = mock.Mock() + mock_update_service.get_task_monitor.return_value = mock_task_monitor + mock_get_update_service.return_value = mock_update_service + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._check_node_firmware_update(task) + + self.assertTrue(mock_log.called) + + @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) + @mock.patch.object(redfish_mgmt.LOG, 'error', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_fail(self, + mock_get_update_service, + mock_log, + mock_cleaning_error_handler): + mock_sushy_task = mock.Mock() + mock_sushy_task.task_state = 'exception' + mock_message_unparsed = mock.Mock() + mock_message_unparsed.message = None + mock_message = mock.Mock() + mock_message.message = 'Firmware upgrade failed' + messages = mock.PropertyMock(side_effect=[[mock_message_unparsed], + [mock_message]]) + type(mock_sushy_task).messages = messages + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = False + mock_task_monitor.get_task.return_value = mock_sushy_task + mock_update_service = mock.Mock() + mock_update_service.get_task_monitor.return_value = mock_task_monitor + mock_get_update_service.return_value = mock_update_service + driver_internal_info = {'something': 'else', + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + management._continue_firmware_updates = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + task.upgrade_lock = mock.Mock() + task.process_event = mock.Mock() + + management._check_node_firmware_update(task) + + task.upgrade_lock.assert_called_once_with() + self.assertTrue(mock_log.called) + self.assertEqual({'something': 'else'}, + task.node.driver_internal_info) + mock_cleaning_error_handler.assert_called_once() + management._continue_firmware_updates.assert_not_called() + + @mock.patch.object(redfish_mgmt.LOG, 'info', autospec=True) + @mock.patch.object(redfish_utils, 'get_update_service', autospec=True) + def test__check_node_firmware_update_done(self, + mock_get_update_service, + mock_log): + mock_task = mock.Mock() + mock_task.task_state = sushy.TASK_STATE_COMPLETED + mock_task.task_status = sushy.HEALTH_OK + mock_message = mock.Mock() + mock_message.message = 'Firmware update done' + mock_task.messages = [mock_message] + mock_task_monitor = mock.Mock() + mock_task_monitor.is_processing = False + mock_task_monitor.get_task.return_value = mock_task + mock_update_service = mock.Mock() + mock_update_service.get_task_monitor.return_value = mock_task_monitor + mock_get_update_service.return_value = mock_update_service + driver_internal_info = { + 'firmware_updates': [ + {'task_monitor': '/task/123', + 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + management = redfish_mgmt.RedfishManagement() + management._continue_firmware_updates = mock.Mock() + + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._check_node_firmware_update(task) + + self.assertTrue(mock_log.called) + management._continue_firmware_updates.assert_called_once_with( + task, + mock_update_service, + [{'task_monitor': '/task/123', + 'url': 'test1'}]) + + @mock.patch.object(redfish_mgmt.LOG, 'debug', autospec=True) + def test__continue_firmware_updates_wait(self, mock_log): + mock_update_service = mock.Mock() + + management = redfish_mgmt.RedfishManagement() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._continue_firmware_updates( + task, + mock_update_service, + [{'task_monitor': '/task/123', + 'url': 'test1', + 'wait': 10, + 'wait_start_time': '20200901123045'}, + {'url': 'test2'}]) + + self.assertTrue(mock_log.called) + # Wait start time has changed + self.assertNotEqual( + '20200901123045', + task.node.driver_internal_info['firmware_updates'] + [0]['wait_start_time']) + + @mock.patch.object(redfish_mgmt.LOG, 'info', autospec=True) + @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + autospec=True) + def test__continue_firmware_updates_last_update( + self, + mock_notify_conductor_resume_clean, + mock_log): + mock_update_service = mock.Mock() + driver_internal_info = { + 'something': 'else', + 'firmware_updates': [ + {'task_monitor': '/task/123', 'url': 'test1'}]} + self.node.driver_internal_info = driver_internal_info + self.node.save() + + management = redfish_mgmt.RedfishManagement() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + management._continue_firmware_updates( + task, + mock_update_service, + [{'task_monitor': '/task/123', 'url': 'test1'}]) + + self.assertTrue(mock_log.called) + mock_notify_conductor_resume_clean.assert_called_once_with(task) + self.assertEqual({'something': 'else'}, + task.node.driver_internal_info) + + @mock.patch.object(redfish_mgmt.LOG, 'debug', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test__continue_firmware_updates_more_updates(self, + mock_node_power_action, + mock_log): + mock_task_monitor = mock.Mock() + mock_task_monitor.task_monitor = '/task/987' + mock_update_service = mock.Mock() + mock_update_service.simple_update.return_value = mock_task_monitor + driver_internal_info = { + 'something': 'else', + 'firmware_updates': [ + {'task_monitor': '/task/123', 'url': 'test1'}, + {'url': 'test2'}]} + self.node.driver_internal_info = driver_internal_info + + management = redfish_mgmt.RedfishManagement() + with task_manager.acquire(self.context, self.node.uuid, + shared=False) as task: + task.node.save = mock.Mock() + + management._continue_firmware_updates( + task, + mock_update_service, + [{'task_monitor': '/task/123', 'url': 'test1'}, + {'url': 'test2'}]) + + self.assertTrue(mock_log.called) + mock_update_service.simple_update.assert_called_once_with('test2') + self.assertIsNotNone( + task.node.driver_internal_info['firmware_updates']) + self.assertEqual( + [{'url': 'test2', 'task_monitor': '/task/987'}], + task.node.driver_internal_info['firmware_updates']) + task.node.save.assert_called_once_with() + mock_node_power_action.assert_called_once_with(task, states.REBOOT) diff --git a/ironic/tests/unit/drivers/modules/redfish/test_utils.py b/ironic/tests/unit/drivers/modules/redfish/test_utils.py index 794982bba6..d818fcab31 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_utils.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_utils.py @@ -339,3 +339,20 @@ class RedfishUtilsTestCase(db_base.DbTestCase): mock.ANY, verify=mock.ANY, auth=mock_basic_auth.return_value ) + + def test_get_update_service(self): + redfish_utils._get_connection = mock.Mock() + mock_update_service = mock.Mock() + redfish_utils._get_connection.return_value = mock_update_service + + result = redfish_utils.get_update_service(self.node) + + self.assertEqual(mock_update_service, result) + + def test_get_update_service_error(self): + redfish_utils._get_connection = mock.Mock() + redfish_utils._get_connection.side_effect =\ + sushy.exceptions.MissingAttributeError + + self.assertRaises(exception.RedfishError, + redfish_utils.get_update_service, self.node) diff --git a/ironic/tests/unit/drivers/third_party_driver_mock_specs.py b/ironic/tests/unit/drivers/third_party_driver_mock_specs.py index 852fefecd4..ab9dc53abc 100644 --- a/ironic/tests/unit/drivers/third_party_driver_mock_specs.py +++ b/ironic/tests/unit/drivers/third_party_driver_mock_specs.py @@ -156,6 +156,9 @@ SUSHY_SPEC = ( 'VIRTUAL_MEDIA_CD', 'VIRTUAL_MEDIA_FLOPPY', 'APPLY_TIME_ON_RESET', + 'TASK_STATE_COMPLETED', + 'HEALTH_OK', + 'HEALTH_WARNING' ) SUSHY_AUTH_SPEC = ( diff --git a/ironic/tests/unit/drivers/third_party_driver_mocks.py b/ironic/tests/unit/drivers/third_party_driver_mocks.py index 33ab080226..df1a2bc2e3 100644 --- a/ironic/tests/unit/drivers/third_party_driver_mocks.py +++ b/ironic/tests/unit/drivers/third_party_driver_mocks.py @@ -218,6 +218,9 @@ if not sushy: VIRTUAL_MEDIA_CD='cd', VIRTUAL_MEDIA_FLOPPY='floppy', APPLY_TIME_ON_RESET='on reset', + TASK_STATE_COMPLETED='completed', + HEALTH_OK='ok', + HEALTH_WARNING='warning' ) sys.modules['sushy'] = sushy diff --git a/releasenotes/notes/redfish-firmware-update-a06d0624325a66ca.yaml b/releasenotes/notes/redfish-firmware-update-a06d0624325a66ca.yaml new file mode 100644 index 0000000000..b3a14b9701 --- /dev/null +++ b/releasenotes/notes/redfish-firmware-update-a06d0624325a66ca.yaml @@ -0,0 +1,9 @@ +--- +features: + - | + Adds support for performing firmware updates using the ``redfish`` + and ``idrac`` hardware types. + + A new firmware update cleaning step has been added to the ``redfish`` + hardware type. The ``idrac`` hardware type also automatically gains this + capability through inheritance.