From c174380db3eb409d8fd601b76e7027ea5c0f319c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aija=20Jaunt=C4=93va?= <aija.jaunteva@dell.com>
Date: Fri, 20 Aug 2021 11:18:00 -0400
Subject: [PATCH] Fix iDRAC import configuration missing task handling

Older iDRACs delete the task after 1 minute, since 5.00.00.00
the task is being kept for 10 minutes.
However, if encountering the issue, handle it and advise
user to either upgrade iDRAC if not already or decrease
checking interval.
Prior this node got stuck in wait mode forever if task was
deleted as raised exception by periodic didn't make the step
fail.

Change-Id: I5d500b7d53e9804aa3b54dc400d8621f40cd5d0c
---
 ironic/drivers/modules/drac/management.py     | 24 ++++++++++++++++++-
 .../drivers/modules/drac/test_management.py   | 19 +++++++++++++++
 ...uration-task-deleted-2a0e6a96509394b6.yaml | 12 ++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 releasenotes/notes/idrac-import-configuration-task-deleted-2a0e6a96509394b6.yaml

diff --git a/ironic/drivers/modules/drac/management.py b/ironic/drivers/modules/drac/management.py
index 006357a081..55b3275ae5 100644
--- a/ironic/drivers/modules/drac/management.py
+++ b/ironic/drivers/modules/drac/management.py
@@ -504,7 +504,29 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
         """Checks progress of running import configuration task"""
 
         node = task.node
-        task_monitor = redfish_utils.get_task_monitor(node, task_monitor_url)
+        try:
+            task_monitor = redfish_utils.get_task_monitor(
+                node, task_monitor_url)
+        except exception.RedfishError as e:
+            error_msg = (_("Failed import configuration task: "
+                           "%(task_monitor_url)s. Message: '%(message)s'. "
+                           "Most likely this happened because could not find "
+                           "the task anymore as it got deleted by iDRAC. "
+                           "If not already, upgrade iDRAC firmware to "
+                           "5.00.00.00 or later that preserves tasks for "
+                           "longer or decrease "
+                           "[drac]query_import_config_job_status_interval")
+                         % {'task_monitor_url': task_monitor_url,
+                            'message': e})
+            log_msg = ("Import configuration task failed for node "
+                       "%(node)s. %(error)s" % {'node': task.node.uuid,
+                                                'error': error_msg})
+            info = node.driver_internal_info
+            info.pop('import_task_monitor_url', None)
+            node.driver_internal_info = info
+            node.save()
+            self._set_failed(task, log_msg, error_msg)
+            return
 
         if not task_monitor.is_processing:
             import_task = task_monitor.get_task()
diff --git a/ironic/tests/unit/drivers/modules/drac/test_management.py b/ironic/tests/unit/drivers/modules/drac/test_management.py
index a115d56d8c..eb70f8d37e 100644
--- a/ironic/tests/unit/drivers/modules/drac/test_management.py
+++ b/ironic/tests/unit/drivers/modules/drac/test_management.py
@@ -1088,6 +1088,25 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest):
             ._check_import_configuration_task
             .assert_called_once_with(task, '/TaskService/123'))
 
+    @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True)
+    def test__check_import_configuration_task_missing(
+            self, mock_get_task_monitor):
+        mock_get_task_monitor.side_effect = exception.RedfishError(
+            error='Task not found')
+        self.management._set_success = mock.Mock()
+        self.management._set_failed = mock.Mock()
+
+        with task_manager.acquire(self.context, self.node.uuid,
+                                  shared=False) as task:
+            self.management._check_import_configuration_task(
+                task, '/TaskService/123')
+
+            self.management._set_failed.assert_called_once_with(
+                task, mock.ANY, mock.ANY)
+            self.management._set_success.assert_not_called()
+            self.assertIsNone(
+                task.node.driver_internal_info.get('import_task_monitor_url'))
+
     @mock.patch.object(drac_mgmt.LOG, 'debug', autospec=True)
     @mock.patch.object(redfish_utils, 'get_task_monitor', autospec=True)
     def test__check_import_configuration_task_still_processing(
diff --git a/releasenotes/notes/idrac-import-configuration-task-deleted-2a0e6a96509394b6.yaml b/releasenotes/notes/idrac-import-configuration-task-deleted-2a0e6a96509394b6.yaml
new file mode 100644
index 0000000000..4f503abf3e
--- /dev/null
+++ b/releasenotes/notes/idrac-import-configuration-task-deleted-2a0e6a96509394b6.yaml
@@ -0,0 +1,12 @@
+---
+fixes:
+  - |
+    Fix ``idrac-redfish`` clean/deploy step ``import_configuration`` to handle
+    completed import configuration tasks that are deleted by iDRAC before
+    Ironic has checked task's status.
+    Prior iDRAC firmware version 5.00.00.00 completed tasks are deleted after
+    1 minute in iDRAC Redfish. That is not always sufficient to check for
+    their status in periodic check that runs every minute by default. Before
+    this fix node got stuck in wait mode forever. This is fixed by failing the
+    step with error informing to decrease periodic check interval or upgrade
+    iDRAC firmware if not done already.