Add iDRAC management via Redfish to idrac HW type

This change adds support for managing an iDRAC -- reset, clear job
queue, and reset to known good state -- via the Redfish out-of-band
(OOB) management protocol to the idrac hardware type. This is offered by
new idrac-redfish management hardware interface implementation cleaning
steps: reset_idrac, clear_job_queue, and known_good_state.
known_good_state both resets an iDRAC and clears its job queue.

Story: 2007617
Task: 39628
Depends-On: https://review.opendev.org/c/x/sushy-oem-idrac/+/782254

Change-Id: Iad69c8d7cf3a373f5cfcc619a479a106efa2e4d4
This commit is contained in:
DhuldevValekar3 2020-04-21 07:15:02 -05:00 committed by Aija Jauntēva
parent ff4c370d4b
commit 601f6d51f6
5 changed files with 197 additions and 3 deletions

View File

@ -20,4 +20,4 @@ ansible>=2.7
python-ibmcclient>=0.2.2,<0.3.0 python-ibmcclient>=0.2.2,<0.3.0
# Dell EMC iDRAC sushy OEM extension # Dell EMC iDRAC sushy OEM extension
sushy-oem-idrac>=2.0.0,<3.0.0 sushy-oem-idrac>=2.1.0,<3.0.0

View File

@ -2,7 +2,7 @@
# #
# Copyright 2014 Red Hat, Inc. # Copyright 2014 Red Hat, Inc.
# All Rights Reserved. # All Rights Reserved.
# Copyright (c) 2017-2020 Dell Inc. or its subsidiaries. # Copyright (c) 2017-2021 Dell Inc. or its subsidiaries.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain # not use this file except in compliance with the License. You may obtain
@ -637,6 +637,122 @@ class DracRedfishManagement(redfish_management.RedfishManagement):
driver_internal_info.pop('export_configuration_location', None) driver_internal_info.pop('export_configuration_location', None)
node.driver_internal_info = driver_internal_info node.driver_internal_info = driver_internal_info
@METRICS.timer('DracRedfishManagement.clear_job_queue')
@base.clean_step(priority=0)
def clear_job_queue(self, task):
"""Clear iDRAC job queue.
:param task: a TaskManager instance containing the node to act
on.
:raises: RedfishError on an error.
"""
system = redfish_utils.get_system(task.node)
for manager in system.managers:
try:
oem_manager = manager.get_oem_extension('Dell')
except sushy.exceptions.OEMExtensionNotFoundError as e:
error_msg = (_("Search for Sushy OEM extension Python package "
"'sushy-oem-idrac' failed for node %(node)s. "
"Ensure it is installed. Error: %(error)s") %
{'node': task.node.uuid, 'error': e})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
try:
oem_manager.job_service.delete_jobs(job_ids=['JID_CLEARALL'])
except sushy.exceptions.SushyError as e:
error_msg = ('Failed to clear iDRAC job queue with system '
'%(system)s manager %(manager)s for node '
'%(node)s. Will try next manager, if available. '
'Error: %(error)s' %
{'system': system.uuid if system.uuid else
system.identity,
'manager': manager.uuid if manager.uuid else
manager.identity,
'node': task.node.uuid,
'error': e})
LOG.debug(error_msg)
continue
LOG.info('Cleared iDRAC job queue for node %(node)s',
{'node': task.node.uuid})
break
else:
error_msg = (_('iDRAC Redfish clear job queue failed for node '
'%(node)s, because system %(system)s has no '
'manager%(no_manager)s.') %
{'node': task.node.uuid,
'system': system.uuid if system.uuid else
system.identity,
'no_manager': '' if not system.managers else
' which could'})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
@METRICS.timer('DracRedfishManagement.reset_idrac')
@base.clean_step(priority=0)
def reset_idrac(self, task):
"""Reset the iDRAC.
:param task: a TaskManager instance containing the node to act
on.
:raises: RedfishError on an error.
"""
system = redfish_utils.get_system(task.node)
for manager in system.managers:
try:
oem_manager = manager.get_oem_extension('Dell')
except sushy.exceptions.OEMExtensionNotFoundError as e:
error_msg = (_("Search for Sushy OEM extension Python package "
"'sushy-oem-idrac' failed for node %(node)s. "
"Ensure it is installed. Error: %(error)s") %
{'node': task.node.uuid, 'error': e})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
try:
oem_manager.reset_idrac()
except sushy.exceptions.SushyError as e:
error_msg = ('Failed to reset iDRAC with system %(system)s '
'manager %(manager)s for node %(node)s. Will try '
'next manager, if available. Error: %(error)s' %
{'system': system.uuid if system.uuid else
system.identity,
'manager': manager.uuid if manager.uuid else
manager.identity,
'node': task.node.uuid,
'error': e})
LOG.debug(error_msg)
continue
redfish_utils.wait_until_get_system_ready(task.node)
LOG.info('Reset iDRAC for node %(node)s', {'node': task.node.uuid})
break
else:
error_msg = (_('iDRAC Redfish reset iDRAC failed for node '
'%(node)s, because system %(system)s has no '
'manager%(no_manager)s.') %
{'node': task.node.uuid,
'system': system.uuid if system.uuid else
system.identity,
'no_manager': '' if not system.managers else
' which could'})
LOG.error(error_msg)
raise exception.RedfishError(error=error_msg)
@METRICS.timer('DracRedfishManagement.known_good_state')
@base.clean_step(priority=0)
def known_good_state(self, task):
"""Reset iDRAC to known good state.
An iDRAC is reset to a known good state by resetting it and
clearing its job queue.
:param task: a TaskManager instance containing the node to act
on.
:raises: RedfishError on an error.
"""
self.reset_idrac(task)
self.clear_job_queue(task)
LOG.info('Reset iDRAC to known good state for node %(node)s',
{'node': task.node.uuid})
class DracWSManManagement(base.ManagementInterface): class DracWSManManagement(base.ManagementInterface):

View File

@ -1,5 +1,6 @@
# Copyright 2017 Red Hat, Inc. # Copyright 2017 Red Hat, Inc.
# All Rights Reserved. # All Rights Reserved.
# Copyright (c) 2020-2021 Dell Inc. or its subsidiaries.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain # not use this file except in compliance with the License. You may obtain
@ -372,3 +373,28 @@ def get_enabled_macs(task, system):
else: else:
LOG.debug("No ethernet interface information is available " LOG.debug("No ethernet interface information is available "
"for node %(node)s", {'node': task.node.uuid}) "for node %(node)s", {'node': task.node.uuid})
@tenacity.retry(
retry=tenacity.retry_if_exception_type(
exception.RedfishConnectionError),
stop=tenacity.stop_after_attempt(CONF.redfish.connection_attempts),
wait=tenacity.wait_fixed(CONF.redfish.connection_retry_interval),
reraise=True)
def wait_until_get_system_ready(node):
"""Wait until Redfish system is ready.
:param node: an Ironic node object
:raises: RedfishConnectionError on time out.
"""
driver_info = parse_driver_info(node)
system_id = driver_info['system_id']
try:
with SessionCache(driver_info) as conn:
return conn.get_system(system_id)
except sushy.exceptions.BadRequestError as e:
err_msg = ("System is not ready for node %(node)s, with error"
"%(error)s, so retrying it",
{'node': node.uuid, 'error': e})
LOG.warning(err_msg)
raise exception.RedfishConnectionError(node=node.uuid, error=e)

View File

@ -2,7 +2,7 @@
# #
# Copyright 2014 Red Hat, Inc. # Copyright 2014 Red Hat, Inc.
# All Rights Reserved. # All Rights Reserved.
# Copyright (c) 2017-2018 Dell Inc. or its subsidiaries. # Copyright (c) 2017-2021 Dell Inc. or its subsidiaries.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); you may # Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain # not use this file except in compliance with the License. You may obtain
@ -839,6 +839,10 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest):
driver_info=INFO_DICT) driver_info=INFO_DICT)
self.management = drac_mgmt.DracRedfishManagement() self.management = drac_mgmt.DracRedfishManagement()
self.config(enabled_hardware_types=['idrac'],
enabled_power_interfaces=['idrac-redfish'],
enabled_management_interfaces=['idrac-redfish'])
def test_export_configuration_name_missing(self): def test_export_configuration_name_missing(self):
task = mock.Mock(node=self.node, context=self.context) task = mock.Mock(node=self.node, context=self.context)
self.assertRaises(exception.MissingParameterValue, self.assertRaises(exception.MissingParameterValue,
@ -1423,3 +1427,42 @@ class DracRedfishManagementTestCase(test_utils.BaseDracTest):
mock_deploy_handler.assert_called_once_with( mock_deploy_handler.assert_called_once_with(
task, 'error', 'log message') task, 'error', 'log message')
@mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True)
def test_clear_job_queue(self, mock_redfish_utils):
mock_system = mock_redfish_utils.get_system.return_value
mock_manager = mock.MagicMock()
mock_system.managers = [mock_manager]
mock_manager_oem = mock_manager.get_oem_extension.return_value
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.management.clear_job_queue(task)
mock_manager_oem.job_service.delete_jobs.assert_called_once_with(
job_ids=['JID_CLEARALL'])
@mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True)
def test_reset_idrac(self, mock_redfish_utils):
mock_system = mock_redfish_utils.get_system.return_value
mock_manager = mock.MagicMock()
mock_system.managers = [mock_manager]
mock_manager_oem = mock_manager.get_oem_extension.return_value
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.management.reset_idrac(task)
mock_manager_oem.reset_idrac.assert_called_once_with()
@mock.patch.object(drac_mgmt, 'redfish_utils', autospec=True)
def test_known_good_state(self, mock_redfish_utils):
mock_system = mock_redfish_utils.get_system.return_value
mock_manager = mock.MagicMock()
mock_system.managers = [mock_manager]
mock_manager_oem = mock_manager.get_oem_extension.return_value
with task_manager.acquire(self.context, self.node.uuid,
shared=False) as task:
task.driver.management.known_good_state(task)
mock_manager_oem.job_service.delete_jobs.assert_called_once_with(
job_ids=['JID_CLEARALL'])
mock_manager_oem.reset_idrac.assert_called_once_with()

View File

@ -0,0 +1,9 @@
---
features:
- |
Adds support for managing an iDRAC -- reset, clear job queue, and reset to
known good state -- via the Redfish out-of-band (OOB) management protocol
to the ``idrac`` hardware type. This is offered by new ``idrac-redfish``
management hardware interface implementation cleaning steps:
``reset_idrac``, ``clear_job_queue``, and ``known_good_state``.
``known_good_state`` both resets an iDRAC and clears its job queue.