Add GPU reporting to idrac-wsman inspect interface
This patch implements reporting number of NVIDIA Tesla T4 devices connected to a system by discovering such devices and reporting them through capability 'pci_gpu_devices'. Change-Id: If713895f05f08a9827c4c085108abb3e388b2a2e Story: 2008118 Task: 40839 Depends-On: https://review.opendev.org/#/c/750364/
This commit is contained in:
parent
e2d0f3fd07
commit
101fc29686
@ -259,6 +259,7 @@ The inspection discovers the following properties:
|
||||
Extra capabilities:
|
||||
|
||||
* ``boot_mode``: UEFI or BIOS boot mode.
|
||||
* ``pci_gpu_devices``: number of GPU devices connected to the bare metal.
|
||||
|
||||
It also creates baremetal ports for each NIC port detected in the system.
|
||||
The ``idrac-wsman`` inspect interface discovers which NIC ports are
|
||||
|
@ -7,7 +7,7 @@
|
||||
proliantutils>=2.10.0
|
||||
pysnmp>=4.3.0,<5.0.0
|
||||
python-scciclient>=0.8.0
|
||||
python-dracclient>=3.1.0,<6.0.0
|
||||
python-dracclient>=5.1.0,<6.0.0
|
||||
python-xclarityclient>=0.1.6
|
||||
|
||||
# The Redfish hardware type uses the Sushy library
|
||||
|
@ -49,6 +49,8 @@ class DracRedfishInspect(redfish_inspect.RedfishInspect):
|
||||
|
||||
class DracWSManInspect(base.InspectInterface):
|
||||
|
||||
_GPU_SUPPORTED_LIST = {"TU104GL [Tesla T4]"}
|
||||
|
||||
def get_properties(self):
|
||||
"""Return the properties of the interface.
|
||||
|
||||
@ -98,9 +100,12 @@ class DracWSManInspect(base.InspectInterface):
|
||||
properties['cpu_arch'] = 'x86_64' if cpus[0].arch64 else 'x86'
|
||||
|
||||
bios_settings = client.list_bios_settings()
|
||||
video_controllers = client.list_video_controllers()
|
||||
current_capabilities = node.properties.get('capabilities', '')
|
||||
new_capabilities = {
|
||||
'boot_mode': bios_settings["BootMode"].current_value.lower()}
|
||||
'boot_mode': bios_settings["BootMode"].current_value.lower(),
|
||||
'pci_gpu_devices': self._calculate_gpus(video_controllers)}
|
||||
|
||||
capabilties = utils.get_updated_capabilities(current_capabilities,
|
||||
new_capabilities)
|
||||
properties['capabilities'] = capabilties
|
||||
@ -190,6 +195,23 @@ class DracWSManInspect(base.InspectInterface):
|
||||
else:
|
||||
return cpu.cores
|
||||
|
||||
def _calculate_gpus(self, video_controllers):
|
||||
"""Find actual GPU count.
|
||||
|
||||
This method reports number of NVIDIA Tesla T4 GPU devices present
|
||||
on the server.
|
||||
|
||||
:param video_controllers: list of video controllers.
|
||||
|
||||
:returns: returns total gpu count.
|
||||
"""
|
||||
gpu_cnt = 0
|
||||
for video_controller in video_controllers:
|
||||
for gpu in self._GPU_SUPPORTED_LIST:
|
||||
if video_controller.description == gpu:
|
||||
gpu_cnt += 1
|
||||
return gpu_cnt
|
||||
|
||||
def _get_pxe_dev_nics(self, client, nics, node):
|
||||
"""Get a list of pxe device interfaces.
|
||||
|
||||
|
@ -135,6 +135,23 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
'PxeDev4Interface': None}
|
||||
nic_settings = {'LegacyBootProto': {'current_value': 'PXE'},
|
||||
'FQDD': 'NIC.Embedded.1-1-1'}
|
||||
video_controllers = [
|
||||
{'id': 'Video.Embedded.1-1',
|
||||
'description': 'Integrated Matrox G200eW3 Graphics Controller',
|
||||
'function_number': 0,
|
||||
'manufacturer': 'Matrox Electronics Systems Ltd.',
|
||||
'pci_device_id': '0536',
|
||||
'pci_vendor_id': '102B',
|
||||
'pci_subdevice_id': '0737',
|
||||
'pci_subvendor_id': '1028'},
|
||||
{'id': 'Video.Slot.7-1',
|
||||
'description': 'TU104GL [Tesla T4]',
|
||||
'function_number': 0,
|
||||
'manufacturer': 'NVIDIA Corporation',
|
||||
'pci_device_id': '1EB8',
|
||||
'pci_vendor_id': '10DE',
|
||||
'pci_subdevice_id': '12A2',
|
||||
'pci_subvendor_id': '10DE'}]
|
||||
|
||||
self.memory = [test_utils.dict_to_namedtuple(values=m) for m in memory]
|
||||
self.cpus = [test_utils.dict_to_namedtuple(values=c) for c in cpus]
|
||||
@ -146,6 +163,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
self.bios_boot_settings = test_utils.dict_of_object(bios_boot_settings)
|
||||
self.uefi_boot_settings = test_utils.dict_of_object(uefi_boot_settings)
|
||||
self.nic_settings = test_utils.dict_of_object(nic_settings)
|
||||
self.video_controllers = [test_utils.dict_to_namedtuple(values=vc)
|
||||
for vc in video_controllers]
|
||||
|
||||
def test_get_properties(self):
|
||||
expected = drac_common.COMMON_PROPERTIES
|
||||
@ -161,7 +180,7 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
'local_gb': 1116,
|
||||
'cpus': 18,
|
||||
'cpu_arch': 'x86_64',
|
||||
'capabilities': 'boot_mode:uefi'}
|
||||
'capabilities': 'boot_mode:uefi,pci_gpu_devices:1'}
|
||||
mock_client = mock.Mock()
|
||||
mock_get_drac_client.return_value = mock_client
|
||||
mock_client.list_memory.return_value = self.memory
|
||||
@ -169,6 +188,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
mock_client.list_virtual_disks.return_value = self.virtual_disks
|
||||
mock_client.list_nics.return_value = self.nics
|
||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||
mock_client.list_video_controllers.return_value = \
|
||||
self.video_controllers
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
@ -191,6 +212,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
mock_client.list_virtual_disks.side_effect = (
|
||||
drac_exceptions.BaseClientException('boom'))
|
||||
mock_client.list_bios_settings.return_value = self.bios_boot_settings
|
||||
mock_client.list_video_controllers.return_value = \
|
||||
self.video_controllers
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
@ -207,7 +230,7 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
'local_gb': 279,
|
||||
'cpus': 18,
|
||||
'cpu_arch': 'x86_64',
|
||||
'capabilities': 'boot_mode:uefi'}
|
||||
'capabilities': 'boot_mode:uefi,pci_gpu_devices:1'}
|
||||
mock_client = mock.Mock()
|
||||
mock_get_drac_client.return_value = mock_client
|
||||
mock_client.list_memory.return_value = self.memory
|
||||
@ -216,6 +239,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||
mock_client.list_nics.return_value = self.nics
|
||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||
mock_client.list_video_controllers.return_value = \
|
||||
self.video_controllers
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
@ -239,12 +264,94 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||
mock_client.list_nics.return_value = self.nics
|
||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||
mock_client.list_video_controllers.return_value = \
|
||||
self.video_controllers
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
self.assertRaises(exception.HardwareInspectionFailure,
|
||||
task.driver.inspect.inspect_hardware, task)
|
||||
|
||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
||||
def test_inspect_hardware_no_supported_gpu(self, mock_port_create,
|
||||
mock_get_drac_client):
|
||||
controllers = [
|
||||
{'id': 'Video.Embedded.1-1',
|
||||
'description': 'Integrated Matrox G200eW3 Graphics Controller',
|
||||
'function_number': 0,
|
||||
'manufacturer': 'Matrox Electronics Systems Ltd.',
|
||||
'pci_device_id': '0536',
|
||||
'pci_vendor_id': '102B',
|
||||
'pci_subdevice_id': '0737',
|
||||
'pci_subvendor_id': '1028'},
|
||||
{'id': 'Video.Slot.7-1',
|
||||
'description': 'GV100GL [Tesla V100 PCIe 16GB]]',
|
||||
'function_number': 0,
|
||||
'manufacturer': 'NVIDIA Corporation',
|
||||
'pci_device_id': '1DB4',
|
||||
'pci_vendor_id': '10DE',
|
||||
'pci_subdevice_id': '1214',
|
||||
'pci_subvendor_id': '10DE'}]
|
||||
|
||||
expected_node_properties = {
|
||||
'memory_mb': 32768,
|
||||
'local_gb': 279,
|
||||
'cpus': 18,
|
||||
'cpu_arch': 'x86_64',
|
||||
'capabilities': 'boot_mode:uefi,pci_gpu_devices:0'}
|
||||
mock_client = mock.Mock()
|
||||
mock_get_drac_client.return_value = mock_client
|
||||
mock_client.list_memory.return_value = self.memory
|
||||
mock_client.list_cpus.return_value = self.cpus
|
||||
mock_client.list_virtual_disks.return_value = []
|
||||
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||
mock_client.list_nics.return_value = self.nics
|
||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||
video_controllers = [test_utils.dict_to_namedtuple(values=vc)
|
||||
for vc in controllers]
|
||||
mock_client.list_video_controllers.return_value = video_controllers
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
return_value = task.driver.inspect.inspect_hardware(task)
|
||||
|
||||
self.node.refresh()
|
||||
self.assertEqual(expected_node_properties, self.node.properties)
|
||||
self.assertEqual(states.MANAGEABLE, return_value)
|
||||
self.assertEqual(2, mock_port_create.call_count)
|
||||
|
||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
||||
def test_inspect_hardware_no_gpu(self, mock_port_create,
|
||||
mock_get_drac_client):
|
||||
expected_node_properties = {
|
||||
'memory_mb': 32768,
|
||||
'local_gb': 279,
|
||||
'cpus': 18,
|
||||
'cpu_arch': 'x86_64',
|
||||
'capabilities': 'boot_mode:uefi,pci_gpu_devices:0'}
|
||||
mock_client = mock.Mock()
|
||||
mock_get_drac_client.return_value = mock_client
|
||||
mock_client.list_memory.return_value = self.memory
|
||||
mock_client.list_cpus.return_value = self.cpus
|
||||
mock_client.list_virtual_disks.return_value = []
|
||||
mock_client.list_physical_disks.return_value = self.physical_disks
|
||||
mock_client.list_nics.return_value = self.nics
|
||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||
mock_client.list_video_controllers.return_value = []
|
||||
|
||||
with task_manager.acquire(self.context, self.node.uuid,
|
||||
shared=True) as task:
|
||||
return_value = task.driver.inspect.inspect_hardware(task)
|
||||
|
||||
self.node.refresh()
|
||||
self.assertEqual(expected_node_properties, self.node.properties)
|
||||
self.assertEqual(states.MANAGEABLE, return_value)
|
||||
self.assertEqual(2, mock_port_create.call_count)
|
||||
|
||||
@mock.patch.object(drac_common, 'get_drac_client', spec_set=True,
|
||||
autospec=True)
|
||||
@mock.patch.object(objects.Port, 'create', spec_set=True, autospec=True)
|
||||
@ -255,7 +362,7 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
'local_gb': 1116,
|
||||
'cpus': 18,
|
||||
'cpu_arch': 'x86_64',
|
||||
'capabilities': 'boot_mode:uefi'}
|
||||
'capabilities': 'boot_mode:uefi,pci_gpu_devices:1'}
|
||||
mock_client = mock.Mock()
|
||||
mock_get_drac_client.return_value = mock_client
|
||||
mock_client.list_memory.return_value = self.memory
|
||||
@ -263,6 +370,8 @@ class DracInspectionTestCase(test_utils.BaseDracTest):
|
||||
mock_client.list_virtual_disks.return_value = self.virtual_disks
|
||||
mock_client.list_nics.return_value = self.nics
|
||||
mock_client.list_bios_settings.return_value = self.uefi_boot_settings
|
||||
mock_client.list_video_controllers.return_value = \
|
||||
self.video_controllers
|
||||
|
||||
mock_port_create.side_effect = exception.MACAlreadyExists("boom")
|
||||
|
||||
|
@ -0,0 +1,8 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds support in ``idrac-wsman`` inspect hardware interface for reporting
|
||||
number of GPU devices connected to a system. This information is advertised
|
||||
through capability ``pci_gpu_devices``, which can be used to make
|
||||
scheduling decisions for the node. Currently, NVIDIA Tesla T4 GPU devices
|
||||
are reported.
|
Loading…
Reference in New Issue
Block a user