update NVIDIA NIC firmware images and settings by ironic-python-agent
Add "update_nvidia_nic_firmware_image" and "update_nvidia_nic_firmware_settings" clean steps to MellanoxDeviceHardwareManager. By adding those two steps, we can update the firmware image and firmware settings of NVIDIA NICs by ironic-python-agent using manual cleaning command The clean steps require mstflint package installed on the image. The "update_nvidia_nic_firmware_image" clean step requires to pass "images" parameter to the clean command The "images" parameter is a json blob contains a list of images, where each image contains a map of: * url: to firmware image (file://, http://) * checksum: checksum of the provided image * checksumType: md5/sha512/sha256 * componentFlavor: PSID of the nic * version: version of the FW The "update_nvidia_nic_firmware_settings" clean step requires to pass "settings" parameter to the clean command The "settings" parameter is a json blob contains a list of settings, where each settings contains a map of: * deviceID: device ID * globalConfig: global config * function0Config: function 0 config * function1Config: function 1 config Change-Id: Icfaffd7c58c3c73c3fa28cfc2a6c954d2c93c16e Story: 2010228 Task: 46016
This commit is contained in:
parent
06413e54a0
commit
2c7f95e3ac
@ -11,12 +11,14 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
from oslo_log import log
|
||||
|
||||
from ironic_python_agent import errors
|
||||
from ironic_python_agent import hardware
|
||||
from ironic_python_agent.hardware_managers.nvidia import nvidia_fw_update
|
||||
from ironic_python_agent import netutils
|
||||
|
||||
LOG = log.getLogger()
|
||||
@ -111,3 +113,53 @@ class MellanoxDeviceHardwareManager(hardware.HardwareManager):
|
||||
vendor=vendor,
|
||||
product=hardware._get_device_info(interface_name, 'net', 'device'),
|
||||
client_id=client_id)
|
||||
|
||||
def get_clean_steps(self, node, ports):
|
||||
"""Get a list of clean steps with priority.
|
||||
|
||||
:param node: The node object as provided by Ironic.
|
||||
:param ports: Port objects as provided by Ironic.
|
||||
:returns: A list of cleaning steps, as a list of dicts.
|
||||
"""
|
||||
return [{'step': 'update_nvidia_nic_firmware_image',
|
||||
'priority': 0,
|
||||
'interface': 'deploy',
|
||||
'reboot_requested': True,
|
||||
'abortable': False,
|
||||
'argsinfo': {
|
||||
'images': {
|
||||
'description': 'Json blob contains a list of images,'
|
||||
' where each image contains a map of '
|
||||
'url: to firmware image (file://, '
|
||||
'http://), '
|
||||
'checksum: of the provided image, '
|
||||
'checksumType: md5/sha512/sha256, '
|
||||
'componentProfile: PSID of the nic, '
|
||||
'version: of the FW',
|
||||
'required': True,
|
||||
}, }
|
||||
},
|
||||
{'step': 'update_nvidia_nic_firmware_settings',
|
||||
'priority': 0,
|
||||
'interface': 'deploy',
|
||||
'reboot_requested': True,
|
||||
'abortable': False,
|
||||
'argsinfo': {
|
||||
'settings': {
|
||||
'description': 'Json blob contains a list of '
|
||||
'settings per device ID, where each '
|
||||
'settings contains a map of '
|
||||
'deviceID: device ID '
|
||||
'globalConfig: global config '
|
||||
'function0Config: function 0 config '
|
||||
'function1Config: function 1 config',
|
||||
'required': True,
|
||||
}, }
|
||||
}
|
||||
]
|
||||
|
||||
def update_nvidia_nic_firmware_image(self, node, ports, images):
|
||||
nvidia_fw_update.update_nvidia_nic_firmware_image(images)
|
||||
|
||||
def update_nvidia_nic_firmware_settings(self, node, ports, settings):
|
||||
nvidia_fw_update.update_nvidia_nic_firmware_settings(settings)
|
||||
|
933
ironic_python_agent/hardware_managers/nvidia/nvidia_fw_update.py
Normal file
933
ironic_python_agent/hardware_managers/nvidia/nvidia_fw_update.py
Normal file
@ -0,0 +1,933 @@
|
||||
# Copyright 2022 Nvidia
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
from urllib import error as urlError
|
||||
from urllib.parse import urlparse
|
||||
from urllib import request
|
||||
|
||||
from ironic_lib.common.i18n import _
|
||||
from ironic_lib.exception import IronicException
|
||||
from oslo_concurrency import processutils
|
||||
from oslo_log import log
|
||||
from oslo_utils import fileutils
|
||||
|
||||
from ironic_python_agent import utils
|
||||
|
||||
FW_VERSION_REGEX = r'FW Version:\s*\t*(?P<fw_ver>\d+\.\d+\.\d+)'
|
||||
RUNNING_FW_VERSION_REGEX = \
|
||||
r'FW Version\(Running\):\s*\t*(?P<fw_ver>\d+\.\d+\.\d+)'
|
||||
ARRAY_PARAM_REGEX = r'(?P<param_name>\w+)\[((?P<index>\d+)|' \
|
||||
r'((?P<first_index>\d+)\.\.(?P<last_index>\d+)))\]'
|
||||
ARRAY_PARAM_VALUE_REGEX = r'Array\[(?P<first_index>\d+)' \
|
||||
r'\.\.(?P<last_index>\d+)\]'
|
||||
PSID_REGEX = r'PSID:\s*\t*(?P<psid>\w+)'
|
||||
NETWORK_DEVICE_REGEX = r'02\d\d'
|
||||
LOG = log.getLogger()
|
||||
|
||||
"""
|
||||
Example of Nvidia NIC Firmware images list:
|
||||
[
|
||||
{
|
||||
"url": "file:///firmware_images/fw1.bin",
|
||||
"checksum": "a94e683ea16d9ae44768f0a65942234d",
|
||||
"checksumType": "md5",
|
||||
"componentFlavor": "MT_0000000540",
|
||||
"version": "24.34.1002"
|
||||
},
|
||||
{
|
||||
"url": "http://10.10.10.10/firmware_images/fw2.bin",
|
||||
"checksum": "a94e683ea16d9ae44768f0a65942234c",
|
||||
"checksumType": "sha512",
|
||||
"componentFlavor": "MT_0000000652",
|
||||
"version": "24.34.1002"
|
||||
}
|
||||
]
|
||||
|
||||
Example of Nvidia NIC Firmware settings list:
|
||||
[
|
||||
{
|
||||
"deviceID": "1017",
|
||||
"globalConfig": {
|
||||
"NUM_OF_VFS": 127,
|
||||
"SRIOV_EN": True
|
||||
},
|
||||
"function0Config": {
|
||||
"PF_TOTAL_SF": 500
|
||||
},
|
||||
"function1Config": {
|
||||
"PF_TOTAL_SF": 600
|
||||
}
|
||||
},
|
||||
{
|
||||
"deviceID": "101B",
|
||||
"globalConfig": {
|
||||
"NUM_OF_VFS": 127,
|
||||
"SRIOV_EN": True
|
||||
},
|
||||
"function0Config": {
|
||||
"PF_TOTAL_SF": 500
|
||||
},
|
||||
"function1Config": {
|
||||
"PF_TOTAL_SF": 600
|
||||
}
|
||||
}
|
||||
]
|
||||
"""
|
||||
|
||||
|
||||
def check_prereq():
|
||||
"""Check that all needed tools are available in the system.
|
||||
|
||||
:returns: None
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
try:
|
||||
# check for mstflint
|
||||
utils.execute('mstflint', '-v')
|
||||
# check for mstconfig
|
||||
utils.execute('mstconfig', '-v')
|
||||
# check for mstfwreset
|
||||
utils.execute('mstfwreset', '-v')
|
||||
# check for lspci
|
||||
utils.execute('lspci', '--version')
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed Prerequisite check. %s', e)
|
||||
raise e
|
||||
|
||||
|
||||
class InvalidFirmwareImageConfig(IronicException):
|
||||
_msg_fmt = _('Invalid firmware image config: %(error_msg)s')
|
||||
|
||||
|
||||
class InvalidFirmwareSettingsConfig(IronicException):
|
||||
_msg_fmt = _('Invalid firmware settings config: %(error_msg)s')
|
||||
|
||||
|
||||
class MismatchChecksumError(IronicException):
|
||||
_msg_fmt = _('Mismatch Checksum for the firmware image: %(error_msg)s')
|
||||
|
||||
|
||||
class MismatchComponentFlavor(IronicException):
|
||||
_msg_fmt = _('Mismatch Component Flavor: %(error_msg)s')
|
||||
|
||||
|
||||
class MismatchFWVersion(IronicException):
|
||||
_msg_fmt = _('Mismatch Firmware version: %(error_msg)s')
|
||||
|
||||
|
||||
class DuplicateComponentFlavor(IronicException):
|
||||
_msg_fmt = _('Duplicate Component Flavor for the firmware image: '
|
||||
'%(error_msg)s')
|
||||
|
||||
|
||||
class DuplicateDeviceID(IronicException):
|
||||
_msg_fmt = _('Duplicate Device ID for firmware settings: '
|
||||
'%(error_msg)s')
|
||||
|
||||
|
||||
class UnSupportedConfigByMstflintPackage(IronicException):
|
||||
_msg_fmt = _('Unsupported config by mstflint package: %(error_msg)s')
|
||||
|
||||
|
||||
class UnSupportedConfigByFW(IronicException):
|
||||
_msg_fmt = _('Unsupported config by Firmware: %(error_msg)s')
|
||||
|
||||
|
||||
class InvalidURLScheme(IronicException):
|
||||
_msg_fmt = _('Invalid URL Scheme: %(error_msg)s')
|
||||
|
||||
|
||||
class NvidiaNicFirmwareOps(object):
|
||||
"""Perform various Firmware related operations on nic device"""
|
||||
|
||||
def __init__(self, dev):
|
||||
self.dev = dev
|
||||
self.dev_info = {}
|
||||
|
||||
def parse_mstflint_query_output(out):
|
||||
"""Parse Mstflint query output
|
||||
|
||||
For now just extract 'FW Version' and 'PSID'
|
||||
:param out: string, mstflint query output
|
||||
:returns: dict of query attributes
|
||||
"""
|
||||
query_info = {}
|
||||
for line in out.split('\n'):
|
||||
line = line.strip()
|
||||
fw_ver = re.match(FW_VERSION_REGEX, line)
|
||||
running_fw_ver = re.match(RUNNING_FW_VERSION_REGEX, line)
|
||||
psid = re.match(PSID_REGEX, line)
|
||||
if fw_ver is not None:
|
||||
query_info['fw_ver'] = fw_ver.group('fw_ver')
|
||||
if running_fw_ver is not None:
|
||||
query_info['running_fw_ver'] = running_fw_ver.group('fw_ver')
|
||||
if psid is not None:
|
||||
query_info['psid'] = psid.group('psid')
|
||||
return query_info
|
||||
|
||||
def _query_device(self, force=False):
|
||||
"""Get firmware information from nvidia nic device
|
||||
|
||||
:param force: bool, force device query, even if query was executed in
|
||||
previous calls.
|
||||
:returns: dict of firmware image attributes
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
if not force and self.dev_info.get('device', '') == self.dev:
|
||||
return self.dev_info
|
||||
try:
|
||||
cmd = ('mstflint', '-d', self.dev, '-qq', 'query')
|
||||
out, _r = utils.execute(*cmd)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to query firmware of device %s: %s',
|
||||
self.dev, e)
|
||||
raise e
|
||||
self.dev_info = NvidiaNicFirmwareOps.parse_mstflint_query_output(out)
|
||||
self.dev_info['device'] = self.dev
|
||||
return self.dev_info
|
||||
|
||||
def get_nic_psid(self):
|
||||
"""Get the psid of nvidia nic device
|
||||
|
||||
:returns: string, the psid of the nic device
|
||||
"""
|
||||
return self._query_device().get('psid')
|
||||
|
||||
def is_image_changed(self):
|
||||
"""Check if image changed and nic device requires firmware reset
|
||||
|
||||
before applying any configurations on the device.
|
||||
Currently the reset happens if image was changed
|
||||
:returns: bool, True if image changed
|
||||
"""
|
||||
self._query_device(force=True)
|
||||
is_image_changed = 'running_fw_ver' in self.dev_info and \
|
||||
self.dev_info['running_fw_ver'] != \
|
||||
self.dev_info['fw_ver']
|
||||
return is_image_changed
|
||||
|
||||
def _need_update(self, fw_version):
|
||||
"""Check if nic device requires firmware update
|
||||
|
||||
:param fw_version: string, the firmware version of image
|
||||
:returns: bool, True if update is needed
|
||||
"""
|
||||
self._query_device(force=True)
|
||||
LOG.info('Device firmware version: %s , Image firmware version: %s',
|
||||
self.dev_info['fw_ver'], fw_version)
|
||||
return self.dev_info['fw_ver'] != fw_version
|
||||
|
||||
def _burn_firmware(self, image_path):
|
||||
"""Burn firmware on device
|
||||
|
||||
:param image_path: string, firmware binary file path
|
||||
:returns: None
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
LOG.info('Updating firmware image (%s) for device: %s',
|
||||
image_path, self.dev)
|
||||
try:
|
||||
cmd = ('mstflint', '-d', self.dev, '-i', image_path,
|
||||
'-y', 'burn')
|
||||
utils.execute(*cmd)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to update firmware image for device %s, %s',
|
||||
self.dev, e)
|
||||
raise e
|
||||
LOG.info('Device %s: firmware image successfully updated.', self.dev)
|
||||
|
||||
def reset_device(self, raise_exception=False):
|
||||
"""Reset nvidia nic to load the new firmware image
|
||||
|
||||
:returns: None
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
LOG.info('Device %s: Performing firmware reset.', self.dev)
|
||||
cmd = ('mstfwreset', '-d', self.dev, '-y', '--sync', '1', 'reset')
|
||||
try:
|
||||
utils.execute(*cmd)
|
||||
LOG.info('Device %s: Firmware successfully reset.', self.dev)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to reset device %s %s', self.dev, e)
|
||||
if raise_exception:
|
||||
raise e
|
||||
|
||||
def fw_update_if_needed(self, version, image_path):
|
||||
"""Update firmware if the current version not equal image version
|
||||
|
||||
:param version: string, the firmware version of image
|
||||
:param image_path: string, the firmware image path
|
||||
:returns: None
|
||||
"""
|
||||
if self._need_update(version):
|
||||
if 'running_fw_ver' in self.dev_info:
|
||||
self.reset_device(raise_exception=True)
|
||||
self._burn_firmware(image_path)
|
||||
else:
|
||||
LOG.info('Firmware update is not required for Device.')
|
||||
|
||||
|
||||
class NvidiaNic(object):
|
||||
"""A class of nvidia nic contains pci, device ID, device PSID and
|
||||
|
||||
an instance of NvidiaNicFirmwareOps
|
||||
"""
|
||||
|
||||
def __init__(self, dev_pci, dev_id, dev_psid, dev_ops):
|
||||
self.dev_pci = dev_pci
|
||||
self.dev_id = dev_id
|
||||
self.dev_psid = dev_psid
|
||||
self.dev_ops = dev_ops
|
||||
|
||||
|
||||
class NvidiaNics(object):
|
||||
"""Discover and retrieve Nvidia Nics on the system.
|
||||
|
||||
Can be used as an iterator once discover has been called.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._devs = []
|
||||
self._devs_psids = []
|
||||
self._dev_ids = []
|
||||
|
||||
def discover(self):
|
||||
"""Discover Nvidia Nics in the system.
|
||||
|
||||
:returns: None
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
if len(self._devs) > 0:
|
||||
return self._devs
|
||||
devs = []
|
||||
|
||||
cmd = ('lspci', '-Dn', '-d', '15b3:')
|
||||
try:
|
||||
out, _r = utils.execute(*cmd)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Exception occurred while discovering Nvidia Nics %s',
|
||||
e)
|
||||
raise e
|
||||
for line in out.strip().split('\n'):
|
||||
if not line:
|
||||
continue
|
||||
dev_class = line.split()[1].split(':')[0]
|
||||
if not re.match(NETWORK_DEVICE_REGEX, dev_class):
|
||||
continue
|
||||
dev_pci = line.split()[0]
|
||||
dev_id = line.split('15b3:')[1].split()[0]
|
||||
dev_ops = NvidiaNicFirmwareOps(dev_pci)
|
||||
dev_psid = dev_ops.get_nic_psid()
|
||||
self._dev_ids.append(dev_id)
|
||||
self._devs_psids.append(dev_psid)
|
||||
devs.append(NvidiaNic(dev_pci, dev_id, dev_psid, dev_ops))
|
||||
self._devs = devs
|
||||
|
||||
def get_psids_list(self):
|
||||
"""Get a list of PSIDs of Nvidia Nics in the system.
|
||||
|
||||
:returns: list of PSIDs of Nvidia Nics in the system
|
||||
"""
|
||||
return set(self._devs_psids)
|
||||
|
||||
def get_ids_list(self):
|
||||
"""Get a list of IDs of Nvidia Nics in the system.
|
||||
|
||||
:returns: list of IDs of Nvidia Nics in the system
|
||||
"""
|
||||
return set(self._dev_ids)
|
||||
|
||||
def __iter__(self):
|
||||
return self._devs.__iter__()
|
||||
|
||||
|
||||
class NvidiaNicFirmwareBinary(object):
|
||||
"""A class of nvidia nic firmware binary which manages the binary
|
||||
|
||||
firmware image, downloads it, validates it and provides its path on the
|
||||
system
|
||||
"""
|
||||
|
||||
def __init__(self, url, checksum, checksum_type,
|
||||
component_flavor, version):
|
||||
self.url = url
|
||||
self.checksum = checksum
|
||||
self.checksum_type = checksum_type
|
||||
self.psid = component_flavor
|
||||
self.version = version
|
||||
self.image_info = {}
|
||||
self._process_url()
|
||||
self._validate_image_psid()
|
||||
self._validate_image_firmware_version()
|
||||
self._validate_image_checksum()
|
||||
|
||||
def __del__(self):
|
||||
self._cleanup_file()
|
||||
|
||||
def _cleanup_file(self):
|
||||
"""Delete the temporary downloaded firmware image if exist in cleanup
|
||||
|
||||
:returns: None
|
||||
"""
|
||||
if os.path.exists(os.path.dirname(self.dest_file_path)):
|
||||
try:
|
||||
shutil.rmtree(os.path.dirname(self.dest_file_path))
|
||||
except Exception as e:
|
||||
LOG.error('Failed to remove temporary directory for FW '
|
||||
'binary: %s', e)
|
||||
|
||||
def _download_file_based_fw(self):
|
||||
"""Download the firmware image file from the provided file url (move)
|
||||
|
||||
:returns: None
|
||||
:raises: Exception
|
||||
"""
|
||||
src_file = self.parsed_url.path
|
||||
try:
|
||||
LOG.info('Moving file: %s to %s', self.url,
|
||||
self.dest_file_path)
|
||||
shutil.move(src_file, self.dest_file_path)
|
||||
except Exception as e:
|
||||
LOG.error('Failed to move file: %s, %s', src_file, e)
|
||||
raise e
|
||||
|
||||
def _download_http_based_fw(self):
|
||||
"""Download the firmware image file from the provided url
|
||||
|
||||
:returns: None
|
||||
:raises: urlError.HTTPError
|
||||
"""
|
||||
try:
|
||||
LOG.info('Downloading file: %s to %s', self.url,
|
||||
self.dest_file_path)
|
||||
url_data = request.urlopen(self.url)
|
||||
except urlError.URLError as url_error:
|
||||
LOG.error('Failed to open URL data: %s', url_error)
|
||||
raise url_error
|
||||
except urlError.HTTPError as http_error:
|
||||
LOG.error('Failed to download data: %s', http_error)
|
||||
raise http_error
|
||||
with open(self.dest_file_path, 'wb') as f:
|
||||
f.write(url_data.read())
|
||||
|
||||
def _process_url(self):
|
||||
"""Process the firmware url and download the image to a temporary
|
||||
|
||||
destination in the system.
|
||||
The supported firmware URL schemes are (file://, http://)
|
||||
:returns: None
|
||||
:raises: InvalidURLScheme, for unsupported firmware url
|
||||
"""
|
||||
parsed_url = urlparse(self.url)
|
||||
self.parsed_url = parsed_url
|
||||
file_name = os.path.basename(str(parsed_url.path))
|
||||
self.dest_file_path = os.path.join(tempfile.mkdtemp(
|
||||
prefix='nvidia_firmware'), file_name)
|
||||
url_scheme = parsed_url.scheme
|
||||
if url_scheme == 'file':
|
||||
self._download_file_based_fw()
|
||||
elif url_scheme == 'http':
|
||||
self._download_http_based_fw()
|
||||
else:
|
||||
err = 'Firmware URL scheme %s is not supported.' \
|
||||
'The supported firmware URL schemes are' \
|
||||
'(http://, file://)' % url_scheme
|
||||
raise InvalidURLScheme(error_msg=_(err))
|
||||
|
||||
def _get_info(self):
|
||||
"""Get firmware information from firmware binary image
|
||||
|
||||
Caller should wrap this call under try catch to skip non compliant
|
||||
firmware binaries.
|
||||
:returns: dict of firmware image attributes
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
if self.image_info:
|
||||
return self.image_info
|
||||
try:
|
||||
cmd = ('mstflint', '-i', self.dest_file_path, 'query')
|
||||
out, _r = utils.execute(*cmd)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to query firmware image %s, %s',
|
||||
self.dest_file_path, e)
|
||||
raise e
|
||||
self.image_info = NvidiaNicFirmwareOps.parse_mstflint_query_output(
|
||||
out)
|
||||
return self.image_info
|
||||
|
||||
def _validate_image_psid(self):
|
||||
"""Validate that the provided PSID same as the PSID in provided
|
||||
|
||||
firmware image
|
||||
:raises: MismatchComponentFlavor if they are not equal
|
||||
"""
|
||||
|
||||
image_psid = self._get_info().get('psid')
|
||||
if image_psid != self.psid:
|
||||
err = 'The provided psid %s does not match the image psid %s' % \
|
||||
(self.psid, image_psid)
|
||||
LOG.error(err)
|
||||
raise MismatchComponentFlavor(error_msg=_(err))
|
||||
|
||||
def _validate_image_firmware_version(self):
|
||||
"""Validate that the provided firmware version same as the version
|
||||
|
||||
in provided firmware image
|
||||
:raises: MismatchFWVersion if they are not equal
|
||||
"""
|
||||
|
||||
image_version = self._get_info().get('fw_ver')
|
||||
if image_version != self.version:
|
||||
err = 'The provided firmware version %s does not match ' \
|
||||
'image firmware version %s' % (self.version, image_version)
|
||||
LOG.error(err)
|
||||
raise MismatchFWVersion(error_msg=_(err))
|
||||
|
||||
def _validate_image_checksum(self):
|
||||
"""Validate the provided checksum with the calculated one of the
|
||||
|
||||
provided firmware image
|
||||
:raises: MismatchChecksumError if they are not equal
|
||||
"""
|
||||
calculated_checksum = fileutils.compute_file_checksum(
|
||||
self.dest_file_path, algorithm=self.checksum_type)
|
||||
if self.checksum != calculated_checksum:
|
||||
err = 'Mismatch provided checksum %s for image %s' % (
|
||||
self.checksum, self.url)
|
||||
LOG.error(err)
|
||||
raise MismatchChecksumError(error_msg=_(err))
|
||||
|
||||
|
||||
class NvidiaFirmwareImages(object):
|
||||
"""A class of nvidia firmware images which manages the user provided
|
||||
|
||||
firmware images list
|
||||
"""
|
||||
|
||||
def __init__(self, firmware_images):
|
||||
self.firmware_images = firmware_images
|
||||
self.filtered_images_psid_dict = {}
|
||||
|
||||
def validate_images_schema(self):
|
||||
"""Validate the provided firmware images list schema
|
||||
|
||||
:raises: InvalidFirmwareImageConfig if any param is missing
|
||||
"""
|
||||
for image in self.firmware_images:
|
||||
if not (image.get('url')
|
||||
and image.get('checksum')
|
||||
and image.get('checksumType')
|
||||
and image.get('componentFlavor')
|
||||
and image.get('version')):
|
||||
err = 'Invalid parameters for image %s,' \
|
||||
'please provide the following parameters ' \
|
||||
'url, checksum, checksumType, componentFlavor, ' \
|
||||
'version' % image
|
||||
LOG.error(err)
|
||||
raise InvalidFirmwareImageConfig(error_msg=_(err))
|
||||
|
||||
def filter_images(self, psids_list):
|
||||
"""Filter firmware images according to the system nics PSIDs,
|
||||
|
||||
and create a map of PSIDs on the system and user provided images.
|
||||
Duplicate PSID is not allowed
|
||||
|
||||
:param psids_list: list of psids of machines nics
|
||||
:returns: None
|
||||
:raises: DuplicateComponentFlavor
|
||||
"""
|
||||
for image in self.firmware_images:
|
||||
if image.get('componentFlavor') in psids_list:
|
||||
if self.filtered_images_psid_dict.get(
|
||||
image.get('componentFlavor')):
|
||||
err = 'Duplicate componentFlavor %s' % \
|
||||
image['componentFlavor']
|
||||
LOG.error(err)
|
||||
raise DuplicateComponentFlavor(error_msg=_(err))
|
||||
else:
|
||||
self.filtered_images_psid_dict[
|
||||
image.get('componentFlavor')] = image
|
||||
else:
|
||||
LOG.debug('Image with component Flavor %s does not match '
|
||||
'any nic in the system',
|
||||
image.get('componentFlavor'))
|
||||
|
||||
def apply_net_firmware_update(self, nvidia_nics):
|
||||
"""Apply nic firmware update for all nvidia nics on the system
|
||||
|
||||
which have mappings to the user provided firmware images
|
||||
:param nvidia_nics: an object of NvidiaNics
|
||||
"""
|
||||
seen_nics = set()
|
||||
for nic in nvidia_nics:
|
||||
if self.filtered_images_psid_dict.get(nic.dev_psid):
|
||||
# pci_prefix is the pci address without the function number
|
||||
# we use it to check if we saw the nic before or not
|
||||
pci_prefix = nic.dev_pci[:-1]
|
||||
is_seen_nic = pci_prefix in seen_nics
|
||||
if not is_seen_nic:
|
||||
seen_nics.add(pci_prefix)
|
||||
fw_bin = NvidiaNicFirmwareBinary(
|
||||
self.filtered_images_psid_dict[nic.dev_psid]['url'],
|
||||
self.filtered_images_psid_dict[nic.dev_psid][
|
||||
'checksum'],
|
||||
self.filtered_images_psid_dict[nic.dev_psid][
|
||||
'checksumType'],
|
||||
self.filtered_images_psid_dict[nic.dev_psid][
|
||||
'componentFlavor'],
|
||||
self.filtered_images_psid_dict[nic.dev_psid][
|
||||
'version'])
|
||||
nic.dev_ops.fw_update_if_needed(
|
||||
self.filtered_images_psid_dict[nic.dev_psid][
|
||||
'version'],
|
||||
fw_bin.dest_file_path)
|
||||
|
||||
|
||||
class NvidiaNicConfig(object):
|
||||
"""Get/Set Nvidia nics configurations"""
|
||||
|
||||
def __init__(self, nvidia_dev, params):
|
||||
self.nvidia_dev = nvidia_dev
|
||||
self.params = params
|
||||
self._tool_confs = None
|
||||
self.device_conf_dict = {}
|
||||
|
||||
def _mstconfig_parse_data(self, data):
|
||||
"""Parsing the mstconfig out to json
|
||||
|
||||
:param data: mstconfig query output
|
||||
:returns: dict of nic configuration
|
||||
"""
|
||||
data = list(filter(None, data.split('\n')))
|
||||
data_dict = {}
|
||||
lines_counter = 0
|
||||
for line in data:
|
||||
lines_counter += 1
|
||||
if 'Configurations:' in line:
|
||||
break
|
||||
for i in range(lines_counter, len(data)):
|
||||
line_data = list(filter(None, data[i].strip().split()))
|
||||
data_dict[line_data[0]] = line_data[1]
|
||||
|
||||
return data_dict
|
||||
|
||||
def _get_device_conf_dict(self):
|
||||
"""Get device Configurations
|
||||
|
||||
:returns: dict {"PARAM_NAME": "Param value", ....}
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
LOG.info('Getting configurations for device: %s',
|
||||
self.nvidia_dev.dev_pci)
|
||||
if not self.device_conf_dict:
|
||||
try:
|
||||
cmd = ['mstconfig', '-d', self.nvidia_dev.dev_pci, 'q']
|
||||
out, _r = utils.execute(*cmd)
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to query firmware of device %s: %s',
|
||||
self.nvidia_dev.dev_pci, e)
|
||||
raise e
|
||||
self.device_conf_dict = self._mstconfig_parse_data(out)
|
||||
return self.device_conf_dict
|
||||
|
||||
def _param_supp_by_config_tool(self, param_name):
|
||||
"""Check if configuration tool supports the provided configuration
|
||||
|
||||
parameter.
|
||||
:param param_name: string, configuration name
|
||||
:returns: bool
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
if self._tool_confs is None:
|
||||
try:
|
||||
self._tool_confs, _r = utils.execute(
|
||||
'mstconfig', '-d', self.nvidia_dev.dev_pci, 'i')
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to query tool configuration of device'
|
||||
' %s: %s', self.nvidia_dev.dev_pci, e)
|
||||
raise e
|
||||
# trim any array index if present
|
||||
indexed_param = re.match(ARRAY_PARAM_REGEX, param_name)
|
||||
if indexed_param:
|
||||
param_name = indexed_param.group('param_name')
|
||||
return param_name in self._tool_confs
|
||||
|
||||
def _param_supp_by_fw(self, param_name):
|
||||
"""Check if fw image supports the provided configuration
|
||||
|
||||
parameter.
|
||||
:param param_name: string, configuration name
|
||||
:returns: bool
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
current_mlx_config = self._get_device_conf_dict()
|
||||
indexed_param = re.match(ARRAY_PARAM_REGEX, param_name)
|
||||
if indexed_param:
|
||||
param_name = indexed_param.group('param_name')
|
||||
if param_name not in current_mlx_config:
|
||||
return False
|
||||
indexed_value = re.match(ARRAY_PARAM_VALUE_REGEX,
|
||||
current_mlx_config[param_name])
|
||||
if not (indexed_value):
|
||||
return False
|
||||
value_first_index = int(indexed_value.group('first_index'))
|
||||
value_last_index = int(indexed_value.group('last_index'))
|
||||
param_index = indexed_param.group('index')
|
||||
if param_index:
|
||||
if int(param_index) in range(value_first_index,
|
||||
value_last_index):
|
||||
return True
|
||||
else:
|
||||
param_first_index = int(indexed_param.group('first_index'))
|
||||
param_last_index = int(indexed_param.group('last_index'))
|
||||
if param_first_index in range(
|
||||
value_first_index, value_last_index) \
|
||||
and param_last_index in range(value_first_index,
|
||||
value_last_index) \
|
||||
and param_first_index < param_last_index:
|
||||
return True
|
||||
return False
|
||||
else:
|
||||
return param_name in current_mlx_config
|
||||
|
||||
def validate_config(self):
|
||||
"""Validate that the firmware settings is supported by mstflint
|
||||
|
||||
package and with current firmware image
|
||||
:returns: None
|
||||
:raises: UnSupportedConfigByMstflintPackage
|
||||
:raises: UnSupportedConfigByFW
|
||||
"""
|
||||
LOG.info('Validating config for device %s',
|
||||
self.nvidia_dev.dev_pci)
|
||||
for key, value in self.params.items():
|
||||
if not self._param_supp_by_config_tool(key):
|
||||
err = 'Configuraiton: %s is not supported by mstconfig, ' \
|
||||
'please update to the latest mstflint package.' % key
|
||||
|
||||
LOG.error(err)
|
||||
raise UnSupportedConfigByMstflintPackage(error_msg=_(err))
|
||||
|
||||
if not self._param_supp_by_fw(key):
|
||||
err = 'Configuraiton %s for device %s is not supported with ' \
|
||||
'current fw' % (key, self.nvidia_dev.dev_pci)
|
||||
LOG.error(err)
|
||||
raise UnSupportedConfigByFW(error_msg=_(err))
|
||||
|
||||
def set_config(self):
|
||||
"""Set device configurations
|
||||
|
||||
:param conf_dict: a dict of:
|
||||
{'PARAM_NAME': 'Param value to set', ...}
|
||||
:returns: None
|
||||
:raises: processutils.ProcessExecutionError
|
||||
"""
|
||||
LOG.info('Setting config for device %s', self.nvidia_dev.dev_pci)
|
||||
current_mlx_config = self._get_device_conf_dict()
|
||||
params_to_set = []
|
||||
for key, value in self.params.items():
|
||||
if re.match(ARRAY_PARAM_REGEX, key):
|
||||
params_to_set.append('%s=%s' % (key, value))
|
||||
else:
|
||||
try:
|
||||
# Handle integer values
|
||||
if int(value) != int(current_mlx_config.get(key)):
|
||||
# Aggregate all configurations required to be modified
|
||||
params_to_set.append('%s=%s' % (key, value))
|
||||
else:
|
||||
LOG.info('value of %s for device %s is already '
|
||||
'configured as %s no need to update it',
|
||||
key, self.nvidia_dev.dev_pci, value)
|
||||
except ValueError:
|
||||
# Handle other values
|
||||
# E.G:
|
||||
# SRIOV_EN False(0)
|
||||
# LINK_TYPE_P1 ETH(2)
|
||||
if str(value).lower() not in \
|
||||
str(current_mlx_config.get(key)).lower():
|
||||
# Aggregate all configurations required to be modified
|
||||
params_to_set.append('%s=%s' % (key, value))
|
||||
else:
|
||||
LOG.info('value of %s for device %s is already '
|
||||
'configured as %s, no need to update it',
|
||||
key, self.nvidia_dev.dev_pci, value)
|
||||
if len(params_to_set) > 0:
|
||||
try:
|
||||
cmd = ['mstconfig', '-d', self.nvidia_dev.dev_pci, '-y', 'set']
|
||||
cmd.extend(params_to_set)
|
||||
LOG.info('Setting configurations for device: %s',
|
||||
)
|
||||
utils.execute(*cmd)
|
||||
LOG.info('Set device configurations: Setting %s '
|
||||
'done successfully',
|
||||
' '.join(params_to_set))
|
||||
except processutils.ProcessExecutionError as e:
|
||||
LOG.error('Failed to set configuration of device %s, '
|
||||
' %s: %s', self.nvidia_dev.dev_pci,
|
||||
params_to_set, e)
|
||||
raise e
|
||||
|
||||
else:
|
||||
LOG.info('Set device configurations: No operation required')
|
||||
|
||||
|
||||
class NvidiaNicsConfig(object):
|
||||
"""A class of nvidia nics config which manages the user provided
|
||||
|
||||
nics firmware settings
|
||||
"""
|
||||
|
||||
def __init__(self, nvidia_nics, settings):
|
||||
self.settings = settings
|
||||
self.nvidia_nics = nvidia_nics
|
||||
self.settings_map = {}
|
||||
self._nvidia_nics_to_be_reset_list = []
|
||||
self._nvidia_nics_config_list = []
|
||||
|
||||
def create_settings_map(self):
|
||||
"""Filter the user provided nics firmware settings according
|
||||
|
||||
to the system nics IDs, and create a map of IDs on the system and
|
||||
user provided nics firmware settings.
|
||||
Duplicate IDs and settings without IDs are not allowed
|
||||
:returns: None
|
||||
:raises: DuplicateDeviceID
|
||||
:raises: InvalidFirmwareSettingsConfig
|
||||
"""
|
||||
ids_list = self.nvidia_nics.get_ids_list()
|
||||
for setting in self.settings:
|
||||
if (setting.get('deviceID')
|
||||
and setting.get('deviceID') in ids_list
|
||||
and not self.settings_map.get(setting.get('deviceID'))):
|
||||
self.settings_map[setting.get('deviceID')] = setting
|
||||
elif setting.get('deviceID') and setting.get('deviceID') in \
|
||||
ids_list:
|
||||
err = 'duplicate settings for device ID %s ' % \
|
||||
setting.get('deviceID')
|
||||
LOG.error(err)
|
||||
raise DuplicateDeviceID(error_msg=_(err))
|
||||
elif setting.get('deviceID'):
|
||||
LOG.debug('There are no devices with ID %s on the system',
|
||||
setting.get('deviceID'))
|
||||
else:
|
||||
err = 'There is no deviceID provided for this settings'
|
||||
LOG.error(err)
|
||||
raise InvalidFirmwareSettingsConfig(error_msg=_(err))
|
||||
|
||||
def prepare_nvidia_nic_config(self):
|
||||
"""Expand the settings map per devices PCI and create a list
|
||||
|
||||
of all NvidiaNicConfig per PCI of nvidia nics on the system.
|
||||
Also create a list of all devices that require firmware reset
|
||||
:returns: None
|
||||
"""
|
||||
seen_nics = set()
|
||||
for nic in self.nvidia_nics:
|
||||
if self.settings_map.get(nic.dev_id):
|
||||
params = {}
|
||||
prefix = nic.dev_pci[:-1]
|
||||
is_seen_nic = prefix in seen_nics
|
||||
if not is_seen_nic:
|
||||
seen_nics.add(prefix)
|
||||
if self.settings_map[nic.dev_id].get('globalConfig'):
|
||||
params.update(self.settings_map[nic.dev_id].get(
|
||||
'globalConfig'))
|
||||
if nic.dev_ops.is_image_changed():
|
||||
self._nvidia_nics_to_be_reset_list.append(nic)
|
||||
is_first_device = nic.dev_pci[-1] == '0'
|
||||
if is_first_device and self.settings_map[nic.dev_id].get(
|
||||
'function0Config'):
|
||||
params.update(self.settings_map[nic.dev_id].get(
|
||||
'function0Config'))
|
||||
elif not is_first_device and self.settings_map[nic.dev_id].get(
|
||||
'function1Config'):
|
||||
params.update(self.settings_map[nic.dev_id].get(
|
||||
'function1Config'))
|
||||
if params:
|
||||
device_config = NvidiaNicConfig(nic, params)
|
||||
self._nvidia_nics_config_list.append(device_config)
|
||||
|
||||
def reset_nvidia_nics(self):
|
||||
"""Reset firmware image for all nics in _nvidia_nics_to_be_reset_list
|
||||
|
||||
:returns: None
|
||||
"""
|
||||
for nvidia_nic in self._nvidia_nics_to_be_reset_list:
|
||||
nvidia_nic.dev_ops.reset_device()
|
||||
|
||||
def validate_settings_config(self):
|
||||
"""Validate firmware settings for all nics in _nvidia_nics_config_list
|
||||
|
||||
:returns: None
|
||||
"""
|
||||
for nvidia_nic_config in self._nvidia_nics_config_list:
|
||||
nvidia_nic_config.validate_config()
|
||||
|
||||
def set_settings_config(self):
|
||||
"""Set firmware settings for all nics in _nvidia_nics_config_list
|
||||
|
||||
:returns: None
|
||||
"""
|
||||
for nvidia_nic_config in self._nvidia_nics_config_list:
|
||||
nvidia_nic_config.set_config()
|
||||
|
||||
def is_not_empty_reset_list(self):
|
||||
"""Check if _nvidia_nics_to_be_reset_list is empty or not
|
||||
|
||||
:returns: bool, True if the list is not empty
|
||||
"""
|
||||
return bool(len(self._nvidia_nics_to_be_reset_list))
|
||||
|
||||
|
||||
def update_nvidia_nic_firmware_image(images):
|
||||
"""Update nvidia nic firmware image from user provided list images
|
||||
|
||||
:param images: list of images
|
||||
:raises: InvalidFirmwareImageConfig
|
||||
"""
|
||||
if not type(images) is list:
|
||||
err = 'The images must be a list of images, %s' % images
|
||||
raise InvalidFirmwareImageConfig(error_msg=_(err))
|
||||
check_prereq()
|
||||
nvidia_fw_images = NvidiaFirmwareImages(images)
|
||||
nvidia_fw_images.validate_images_schema()
|
||||
nvidia_nics = NvidiaNics()
|
||||
nvidia_nics.discover()
|
||||
nvidia_fw_images.filter_images(nvidia_nics.get_psids_list())
|
||||
nvidia_fw_images.apply_net_firmware_update(nvidia_nics)
|
||||
|
||||
|
||||
def update_nvidia_nic_firmware_settings(settings):
|
||||
"""Update nvidia nic firmware settings from user provided list of settings
|
||||
|
||||
:param settings: list of settings
|
||||
:raises: InvalidFirmwareSettingsConfig
|
||||
"""
|
||||
if not type(settings) is list:
|
||||
err = 'The settings must be list of settings, %s' % settings
|
||||
raise InvalidFirmwareSettingsConfig(error_msg=_(err))
|
||||
check_prereq()
|
||||
nvidia_nics = NvidiaNics()
|
||||
nvidia_nics.discover()
|
||||
nvidia_nics_config = NvidiaNicsConfig(nvidia_nics, settings)
|
||||
nvidia_nics_config.create_settings_map()
|
||||
nvidia_nics_config.prepare_nvidia_nic_config()
|
||||
if nvidia_nics_config.is_not_empty_reset_list():
|
||||
nvidia_nics_config.reset_nvidia_nics()
|
||||
nvidia_nics_config.validate_settings_config()
|
||||
nvidia_nics_config.set_settings_config()
|
0
ironic_python_agent/tests/unit/hardware_managers/nvidia/__init__.py
Executable file
0
ironic_python_agent/tests/unit/hardware_managers/nvidia/__init__.py
Executable file
File diff suppressed because it is too large
Load Diff
@ -42,6 +42,42 @@ class MlnxHardwareManager(base.IronicAgentTest):
|
||||
CLIENT_ID,
|
||||
mlnx._generate_client_id(IB_ADDRESS))
|
||||
|
||||
def test_get_clean_steps(self):
|
||||
expected_clean_steps = [
|
||||
{'abortable': False,
|
||||
'argsinfo': {
|
||||
'images': {
|
||||
'description': 'Json blob contains a list of images, '
|
||||
'where each image contains a map of '
|
||||
'url: to firmware image (file://, '
|
||||
'http://), '
|
||||
'checksum: of the provided image, '
|
||||
'checksumType: md5/sha512/sha256, '
|
||||
'componentProfile: PSID of the nic, '
|
||||
'version: of the FW',
|
||||
'required': True}},
|
||||
'interface': 'deploy',
|
||||
'priority': 0,
|
||||
'reboot_requested': True,
|
||||
'step': 'update_nvidia_nic_firmware_image'},
|
||||
{'abortable': False,
|
||||
'argsinfo': {
|
||||
'settings': {
|
||||
'description': 'Json blob contains a list of settings '
|
||||
'per device ID, where each settings '
|
||||
'contains a map of '
|
||||
'deviceID: device ID '
|
||||
'globalConfig: global config '
|
||||
'function0Config: function 0 config '
|
||||
'function1Config: function 1 config',
|
||||
'required': True}},
|
||||
'interface': 'deploy',
|
||||
'priority': 0,
|
||||
'reboot_requested': True,
|
||||
'step': 'update_nvidia_nic_firmware_settings'}]
|
||||
self.assertEqual(self.hardware.get_clean_steps(self.node, []),
|
||||
expected_clean_steps)
|
||||
|
||||
@mock.patch.object(os, 'listdir', autospec=True)
|
||||
@mock.patch.object(hardware, '_get_device_info', autospec=True)
|
||||
def test_detect_hardware(self, mocked_get_device_info, mock_listdir):
|
||||
|
7
releasenotes/notes/feature-2010228-cf3a59b88f07c3a7.yaml
Normal file
7
releasenotes/notes/feature-2010228-cf3a59b88f07c3a7.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Add two clean steps MellanoxDeviceHardwareManager to update
|
||||
NVIDIA NICs firmware images and settings,
|
||||
* update_nvidia_nic_firmware_image
|
||||
* update_nvidia_nic_firmware_settings
|
Loading…
x
Reference in New Issue
Block a user