From 601f6d51f693c5b0e6b570234db415f12f881257 Mon Sep 17 00:00:00 2001
From: DhuldevValekar3 <dhuldev.valekar@gmail.com>
Date: Tue, 21 Apr 2020 07:15:02 -0500
Subject: [PATCH] Add iDRAC management via Redfish to idrac HW type

This change adds support for managing an iDRAC -- reset, clear job
queue, and reset to known good state -- via the Redfish out-of-band
(OOB) management protocol to the idrac hardware type. This is offered by
new idrac-redfish management hardware interface implementation cleaning
steps: reset_idrac, clear_job_queue, and known_good_state.
known_good_state both resets an iDRAC and clears its job queue.

Story: 2007617
Task: 39628
Depends-On: https://review.opendev.org/c/x/sushy-oem-idrac/+/782254

Change-Id: Iad69c8d7cf3a373f5cfcc619a479a106efa2e4d4
---
 driver-requirements.txt                       |   2 +-
 ironic/drivers/modules/drac/management.py     | 118 +++++++++++++++++-
 ironic/drivers/modules/redfish/utils.py       |  26 ++++
 .../drivers/modules/drac/test_management.py   |  45 ++++++-
 ...ish_mgmt_clean_steps-c983a8858835046d.yaml |   9 ++
 5 files changed, 197 insertions(+), 3 deletions(-)
 create mode 100644 releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml

diff --git a/driver-requirements.txt b/driver-requirements.txt
index 921c2a6490..457040ed48 100644
--- a/driver-requirements.txt
+++ b/driver-requirements.txt
@@ -20,4 +20,4 @@ ansible>=2.7
 python-ibmcclient>=0.2.2,<0.3.0
 
 # Dell EMC iDRAC sushy OEM extension
-sushy-oem-idrac>=2.0.0,<3.0.0
+sushy-oem-idrac>=2.1.0,<3.0.0
diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py
index e069e086c4..ee8fc37796 100644
--- a/ironic/drivers/modules/drac/management.py
+++ b/ironic/drivers/modules/drac/management.py
@@ -2,7 +2,7 @@
 #
 # Copyright 2014 Red Hat, Inc.
 # All Rights Reserved.
-# Copyright (c) 2017-2020 Dell Inc. or its subsidiaries.
+# Copyright (c) 2017-2021 Dell Inc. or its subsidiaries.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
@@ -637,6 +637,122 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
         driver_internal_info.pop('export_configuration_location', None)
         node.driver_internal_info = driver_internal_info
 
+    @METRICS.timer('DracRedfishManagement.clear_job_queue')
+    @base.clean_step(priority=0)
+    def clear_job_queue(self, task):
+        """Clear iDRAC job queue.
+
+        :param task: a TaskManager instance containing the node to act
+                     on.
+        :raises: RedfishError on an error.
+        """
+        system = redfish_utils.get_system(task.node)
+        for manager in system.managers:
+            try:
+                oem_manager = manager.get_oem_extension('Dell')
+            except sushy.exceptions.OEMExtensionNotFoundError as e:
+                error_msg = (_("Search for Sushy OEM extension Python package "
+                               "'sushy-oem-idrac' failed for node %(node)s. "
+                               "Ensure it is installed. Error: %(error)s") %
+                             {'node': task.node.uuid, 'error': e})
+                LOG.error(error_msg)
+                raise exception.RedfishError(error=error_msg)
+            try:
+                oem_manager.job_service.delete_jobs(job_ids=['JID_CLEARALL'])
+            except sushy.exceptions.SushyError as e:
+                error_msg = ('Failed to clear iDRAC job queue with system '
+                             '%(system)s manager %(manager)s for node '
+                             '%(node)s. Will try next manager, if available. '
+                             'Error: %(error)s' %
+                             {'system': system.uuid if system.uuid else
+                              system.identity,
+                              'manager': manager.uuid if manager.uuid else
+                              manager.identity,
+                              'node': task.node.uuid,
+                              'error': e})
+                LOG.debug(error_msg)
+                continue
+            LOG.info('Cleared iDRAC job queue for node %(node)s',
+                     {'node': task.node.uuid})
+            break
+        else:
+            error_msg = (_('iDRAC Redfish clear job queue failed for node '
+                           '%(node)s, because system %(system)s has no '
+                           'manager%(no_manager)s.') %
+                         {'node': task.node.uuid,
+                          'system': system.uuid if system.uuid else
+                          system.identity,
+                          'no_manager': '' if not system.managers else
+                          ' which could'})
+            LOG.error(error_msg)
+            raise exception.RedfishError(error=error_msg)
+
+    @METRICS.timer('DracRedfishManagement.reset_idrac')
+    @base.clean_step(priority=0)
+    def reset_idrac(self, task):
+        """Reset the iDRAC.
+
+        :param task: a TaskManager instance containing the node to act
+                     on.
+        :raises: RedfishError on an error.
+        """
+        system = redfish_utils.get_system(task.node)
+        for manager in system.managers:
+            try:
+                oem_manager = manager.get_oem_extension('Dell')
+            except sushy.exceptions.OEMExtensionNotFoundError as e:
+                error_msg = (_("Search for Sushy OEM extension Python package "
+                               "'sushy-oem-idrac' failed for node %(node)s. "
+                               "Ensure it is installed. Error: %(error)s") %
+                             {'node': task.node.uuid, 'error': e})
+                LOG.error(error_msg)
+                raise exception.RedfishError(error=error_msg)
+            try:
+                oem_manager.reset_idrac()
+            except sushy.exceptions.SushyError as e:
+                error_msg = ('Failed to reset iDRAC with system %(system)s '
+                             'manager %(manager)s for node %(node)s. Will try '
+                             'next manager, if available. Error: %(error)s' %
+                             {'system': system.uuid if system.uuid else
+                              system.identity,
+                              'manager': manager.uuid if manager.uuid else
+                              manager.identity,
+                              'node': task.node.uuid,
+                              'error': e})
+                LOG.debug(error_msg)
+                continue
+            redfish_utils.wait_until_get_system_ready(task.node)
+            LOG.info('Reset iDRAC for node %(node)s', {'node': task.node.uuid})
+            break
+        else:
+            error_msg = (_('iDRAC Redfish reset iDRAC failed for node '
+                           '%(node)s, because system %(system)s has no '
+                           'manager%(no_manager)s.') %
+                         {'node': task.node.uuid,
+                          'system': system.uuid if system.uuid else
+                          system.identity,
+                          'no_manager': '' if not system.managers else
+                          ' which could'})
+            LOG.error(error_msg)
+            raise exception.RedfishError(error=error_msg)
+
+    @METRICS.timer('DracRedfishManagement.known_good_state')
+    @base.clean_step(priority=0)
+    def known_good_state(self, task):
+        """Reset iDRAC to known good state.
+
+        An iDRAC is reset to a known good state by resetting it and
+        clearing its job queue.
+
+        :param task: a TaskManager instance containing the node to act
+                     on.
+        :raises: RedfishError on an error.
+        """
+        self.reset_idrac(task)
+        self.clear_job_queue(task)
+        LOG.info('Reset iDRAC to known good state for node %(node)s',
+                 {'node': task.node.uuid})
+
 
 class DracWSManManagement(base.ManagementInterface):
 
diff --git a/ironic/drivers/modules/redfish/utils.py b/ironic/drivers/modules/redfish/utils.py
index 49e7062333..63259a87b9 100644
--- a/ironic/drivers/modules/redfish/utils.py
+++ b/ironic/drivers/modules/redfish/utils.py
@@ -1,5 +1,6 @@
 # Copyright 2017 Red Hat, Inc.
 # All Rights Reserved.
+# Copyright (c) 2020-2021 Dell Inc. or its subsidiaries.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
@@ -372,3 +373,28 @@ def get_enabled_macs(task, system):
     else:
         LOG.debug("No ethernet interface information is available "
                   "for node %(node)s", {'node': task.node.uuid})
+
+
+@tenacity.retry(
+    retry=tenacity.retry_if_exception_type(
+        exception.RedfishConnectionError),
+    stop=tenacity.stop_after_attempt(CONF.redfish.connection_attempts),
+    wait=tenacity.wait_fixed(CONF.redfish.connection_retry_interval),
+    reraise=True)
+def wait_until_get_system_ready(node):
+    """Wait until Redfish system is ready.
+
+    :param node: an Ironic node object
+    :raises: RedfishConnectionError on time out.
+    """
+    driver_info = parse_driver_info(node)
+    system_id = driver_info['system_id']
+    try:
+        with SessionCache(driver_info) as conn:
+            return conn.get_system(system_id)
+    except sushy.exceptions.BadRequestError as e:
+        err_msg = ("System is not ready for node %(node)s, with error"
+                   "%(error)s, so retrying it",
+                   {'node': node.uuid, 'error': e})
+        LOG.warning(err_msg)
+        raise exception.RedfishConnectionError(node=node.uuid, error=e)
diff --git a/ironic/tests/unit/drivers/modules/drac/test_management.py b/ironic/tests/unit/drivers/modules/drac/test_management.py
index 227d5cd5f7..d2a290525a 100644
--- a/ironic/tests/unit/drivers/modules/drac/test_management.py
+++ b/ironic/tests/unit/drivers/modules/drac/test_management.py
@@ -2,7 +2,7 @@
 #
 # Copyright 2014 Red Hat, Inc.
 # All Rights Reserved.
-# Copyright (c) 2017-2018 Dell Inc. or its subsidiaries.
+# Copyright (c) 2017-2021 Dell Inc. or its subsidiaries.
 #
 #    Licensed under the Apache License, Version 2.0 (the "License"); you may
 #    not use this file except in compliance with the License. You may obtain
@@ -839,6 +839,10 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest):
                                                driver_info=INFO_DICT)
         self.management = drac_mgmt.DracRedfishManagement()
 
+        self.config(enabled_hardware_types=['idrac'],
+                    enabled_power_interfaces=['idrac-redfish'],
+                    enabled_management_interfaces=['idrac-redfish'])
+
     def test_export_configuration_name_missing(self):
         task = mock.Mock(node=self.node, context=self.context)
         self.assertRaises(exception.MissingParameterValue,
@@ -1423,3 +1427,42 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest):
 
             mock_deploy_handler.assert_called_once_with(
                 task, 'error', 'log message')
+
+    @mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True)
+    def test_clear_job_queue(self, mock_redfish_utils):
+        mock_system = mock_redfish_utils.get_system.return_value
+        mock_manager = mock.MagicMock()
+        mock_system.managers = [mock_manager]
+        mock_manager_oem = mock_manager.get_oem_extension.return_value
+
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=False) as task:
+            task.driver.management.clear_job_queue(task)
+            mock_manager_oem.job_service.delete_jobs.assert_called_once_with(
+                job_ids=['JID_CLEARALL'])
+
+    @mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True)
+    def test_reset_idrac(self, mock_redfish_utils):
+        mock_system = mock_redfish_utils.get_system.return_value
+        mock_manager = mock.MagicMock()
+        mock_system.managers = [mock_manager]
+        mock_manager_oem = mock_manager.get_oem_extension.return_value
+
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=False) as task:
+            task.driver.management.reset_idrac(task)
+            mock_manager_oem.reset_idrac.assert_called_once_with()
+
+    @mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True)
+    def test_known_good_state(self, mock_redfish_utils):
+        mock_system = mock_redfish_utils.get_system.return_value
+        mock_manager = mock.MagicMock()
+        mock_system.managers = [mock_manager]
+        mock_manager_oem = mock_manager.get_oem_extension.return_value
+
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=False) as task:
+            task.driver.management.known_good_state(task)
+            mock_manager_oem.job_service.delete_jobs.assert_called_once_with(
+                job_ids=['JID_CLEARALL'])
+            mock_manager_oem.reset_idrac.assert_called_once_with()
diff --git a/releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml b/releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml
new file mode 100644
index 0000000000..af4a531215
--- /dev/null
+++ b/releasenotes/notes/redfish_mgmt_clean_steps-c983a8858835046d.yaml
@@ -0,0 +1,9 @@
+---
+features:
+  - |
+    Adds support for managing an iDRAC -- reset, clear job queue, and reset to
+    known good state -- via the Redfish out-of-band (OOB) management protocol
+    to the ``idrac`` hardware type. This is offered by new ``idrac-redfish``
+    management hardware interface implementation cleaning steps:
+    ``reset_idrac``, ``clear_job_queue``, and ``known_good_state``.
+    ``known_good_state`` both resets an iDRAC and clears its job queue.