From 82abc0beacd3f8b4626ee1fab6634204af65a07b Mon Sep 17 00:00:00 2001 From: Ilya Etingof Date: Thu, 13 Jun 2019 19:01:18 +0200 Subject: [PATCH] Collect sensor data in ``redfish`` hardware type Adds sensor data collector to ``redfish`` management interface. Temperature, power, cooling and drive health metrics are collected. Change-Id: I8accdcc73c7e0261579d753633f9dfc02a868115 Story: 2005878 Task: 33692 --- driver-requirements.txt | 2 +- ironic/drivers/modules/redfish/management.py | 151 ++++++++++++- .../modules/redfish/test_management.py | 198 +++++++++++++++++- .../add-redfish-sensors-4e2f7e3f8a7c6d5b.yaml | 5 + 4 files changed, 348 insertions(+), 8 deletions(-) create mode 100644 releasenotes/notes/add-redfish-sensors-4e2f7e3f8a7c6d5b.yaml diff --git a/driver-requirements.txt b/driver-requirements.txt index fd0dbc4d3e..9362a6501f 100644 --- a/driver-requirements.txt +++ b/driver-requirements.txt @@ -16,7 +16,7 @@ python-xclarityclient>=0.1.6 ImcSdk>=0.7.2 # The Redfish hardware type uses the Sushy library -sushy>=1.6.0 +sushy>=1.9.0 # Ansible-deploy interface ansible>=2.4 diff --git a/ironic/drivers/modules/redfish/management.py b/ironic/drivers/modules/redfish/management.py index 3c86fde571..56f35ba601 100644 --- a/ironic/drivers/modules/redfish/management.py +++ b/ironic/drivers/modules/redfish/management.py @@ -13,6 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. +import collections + from oslo_log import log from oslo_utils import importutils @@ -222,14 +224,155 @@ class RedfishManagement(base.ManagementInterface): return BOOT_MODE_MAP.get(system.boot.get('mode')) + @staticmethod + def _sensor2dict(resource, *fields): + return {field: getattr(resource, field) + for field in fields + if hasattr(resource, field)} + + @classmethod + def _get_sensors_fan(cls, chassis): + """Get fan sensors reading. + + :param chassis: Redfish `chassis` object + :returns: returns a dict of sensor data. + """ + sensors = {} + + for fan in chassis.thermal.fans.get_members(): + sensor = cls._sensor2dict( + fan, 'identity', 'max_reading_range', + 'min_reading_range', 'reading', 'reading_units', + 'serial_number', 'physical_context') + sensor.update(cls._sensor2dict(fan.status, 'state', 'health')) + unique_name = '%s@%s' % (fan.identity, chassis.identity) + sensors[unique_name] = sensor + + return sensors + + @classmethod + def _get_sensors_temperatures(cls, chassis): + """Get temperature sensors reading. + + :param chassis: Redfish `chassis` object + :returns: returns a dict of sensor data. + """ + sensors = {} + + for temps in chassis.thermal.temperatures.get_members(): + sensor = cls._sensor2dict( + temps, 'identity', 'max_reading_range_temp', + 'min_reading_range_temp', 'reading_celsius', + 'physical_context', 'sensor_number') + sensor.update(cls._sensor2dict(temps.status, 'state', 'health')) + unique_name = '%s@%s' % (temps.identity, chassis.identity) + sensors[unique_name] = sensor + + return sensors + + @classmethod + def _get_sensors_power(cls, chassis): + """Get power supply sensors reading. + + :param chassis: Redfish `chassis` object + :returns: returns a dict of sensor data. + """ + sensors = {} + + for power in chassis.power.power_supplies: + sensor = cls._sensor2dict( + power, 'power_capacity_watts', + 'line_input_voltage', 'last_power_output_watts', + 'serial_number') + sensor.update(cls._sensor2dict(power.status, 'state', 'health')) + sensor.update(cls._sensor2dict( + power.input_ranges, 'minimum_voltage', + 'maximum_voltage', 'minimum_frequency_hz', + 'maximum_frequency_hz', 'output_wattage')) + unique_name = '%s:%s@%s' % ( + power.member_id, chassis.power.identity, + chassis.identity) + sensors[unique_name] = sensor + + return sensors + + @classmethod + def _get_sensors_drive(cls, system): + """Get storage drive sensors reading. + + :param chassis: Redfish `system` object + :returns: returns a dict of sensor data. + """ + sensors = {} + + for storage in system.simple_storage.get_members(): + for drive in storage.devices: + sensor = cls._sensor2dict( + drive, 'identity', 'model', 'capacity_bytes', + 'failure_predicted') + sensor.update( + cls._sensor2dict(drive.status, 'state', 'health')) + unique_name = '%s:%s@%s' % ( + drive.identity, system.simple_storage.identity, + system.identity) + sensors[unique_name] = sensor + + return sensors + def get_sensors_data(self, task): """Get sensors data. - Not implemented for this driver. - - :raises: NotImplementedError + :param task: a TaskManager instance. + :raises: FailedToGetSensorData when getting the sensor data fails. + :raises: FailedToParseSensorData when parsing sensor data fails. + :raises: InvalidParameterValue if required parameters + are missing. + :raises: MissingParameterValue if a required parameter is missing. + :returns: returns a dict of sensor data grouped by sensor type. """ - raise NotImplementedError() + node = task.node + + sensors = collections.defaultdict(dict) + + system = redfish_utils.get_system(node) + + for chassis in system.chassis: + try: + sensors['Fan'].update(self._get_sensors_fan(chassis)) + + except sushy.exceptions.SushyError as exc: + LOG.debug("Failed reading fan information for node " + "%(node)s: %(error)s", {'node': node.uuid, + 'error': exc}) + + try: + sensors['Temperature'].update( + self._get_sensors_temperatures(chassis)) + + except sushy.exceptions.SushyError as exc: + LOG.debug("Failed reading temperature information for node " + "%(node)s: %(error)s", {'node': node.uuid, + 'error': exc}) + + try: + sensors['Power'].update(self._get_sensors_power(chassis)) + + except sushy.exceptions.SushyError as exc: + LOG.debug("Failed reading power information for node " + "%(node)s: %(error)s", {'node': node.uuid, + 'error': exc}) + + try: + sensors['Drive'].update(self._get_sensors_drive(system)) + + except sushy.exceptions.SushyError as exc: + LOG.debug("Failed reading drive information for node " + "%(node)s: %(error)s", {'node': node.uuid, + 'error': exc}) + + LOG.debug("Gathered sensor data: %(sensors)s", {'sensors': sensors}) + + return sensors @task_manager.require_exclusive_lock def inject_nmi(self, task): diff --git a/ironic/tests/unit/drivers/modules/redfish/test_management.py b/ironic/tests/unit/drivers/modules/redfish/test_management.py index 7876b2087f..0807afa57f 100644 --- a/ironic/tests/unit/drivers/modules/redfish/test_management.py +++ b/ironic/tests/unit/drivers/modules/redfish/test_management.py @@ -215,11 +215,203 @@ class RedfishManagementTestCase(db_base.DbTestCase): expected = boot_modes.LEGACY_BIOS self.assertEqual(expected, response) - def test_get_sensors_data(self): + def test__get_sensors_fan(self): + attributes = { + "identity": "XXX-YYY-ZZZ", + "name": "CPU Fan", + "status": { + "state": "enabled", + "health": "OK" + }, + "reading": 6000, + "reading_units": "RPM", + "lower_threshold_fatal": 2000, + "min_reading_range": 0, + "max_reading_range": 10000, + "serial_number": "SN010203040506", + "physical_context": "CPU" + } + + mock_chassis = mock.MagicMock(identity='ZZZ-YYY-XXX') + + mock_fans = mock_chassis.thermal.fans + mock_fan = mock.MagicMock(**attributes) + mock_fan.name = attributes['name'] + mock_fan.status = mock.MagicMock(**attributes['status']) + mock_fans.get_members.return_value = [mock_fan] + with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: - self.assertRaises(NotImplementedError, - task.driver.management.get_sensors_data, task) + sensors = task.driver.management._get_sensors_fan(mock_chassis) + + expected = { + 'XXX-YYY-ZZZ@ZZZ-YYY-XXX': { + 'identity': 'XXX-YYY-ZZZ', + 'max_reading_range': 10000, + 'min_reading_range': 0, + 'physical_context': 'CPU', + 'reading': 6000, + 'reading_units': 'RPM', + 'serial_number': 'SN010203040506', + 'health': 'OK', + 'state': 'enabled' + } + } + + self.assertEqual(expected, sensors) + + def test__get_sensors_temperatures(self): + attributes = { + "identity": "XXX-YYY-ZZZ", + "name": "CPU Temp", + "status": { + "state": "enabled", + "health": "OK" + }, + "reading_celsius": 62, + "upper_threshold_non_critical": 75, + "upper_threshold_critical": 90, + "upperThresholdFatal": 95, + "min_reading_range_temp": 0, + "max_reading_range_temp": 120, + "physical_context": "CPU", + "sensor_number": 1 + } + + mock_chassis = mock.MagicMock(identity='ZZZ-YYY-XXX') + mock_temperatures = mock_chassis.thermal.temperatures + mock_temperature = mock.MagicMock(**attributes) + mock_temperature.name = attributes['name'] + mock_temperature.status = mock.MagicMock(**attributes['status']) + mock_temperatures.get_members.return_value = [mock_temperature] + + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + sensors = task.driver.management._get_sensors_temperatures( + mock_chassis) + + expected = { + 'XXX-YYY-ZZZ@ZZZ-YYY-XXX': { + 'identity': 'XXX-YYY-ZZZ', + 'max_reading_range_temp': 120, + 'min_reading_range_temp': 0, + 'physical_context': 'CPU', + 'reading_celsius': 62, + 'sensor_number': 1, + 'health': 'OK', + 'state': 'enabled' + } + } + + self.assertEqual(expected, sensors) + + def test__get_sensors_power(self): + attributes = { + 'member_id': 0, + 'name': 'Power Supply 0', + 'power_capacity_watts': 1450, + 'last_power_output_watts': 650, + 'line_input_voltage': 220, + 'input_ranges': { + 'minimum_voltage': 185, + 'maximum_voltage': 250, + 'minimum_frequency_hz': 47, + 'maximum_frequency_hz': 63, + 'output_wattage': 1450 + }, + 'serial_number': 'SN010203040506', + "status": { + "state": "enabled", + "health": "OK" + } + } + + mock_chassis = mock.MagicMock(identity='ZZZ-YYY-XXX') + mock_power = mock_chassis.power + mock_power.identity = 'Power' + mock_psu = mock.MagicMock(**attributes) + mock_psu.name = attributes['name'] + mock_psu.status = mock.MagicMock(**attributes['status']) + mock_psu.input_ranges = mock.MagicMock(**attributes['input_ranges']) + mock_power.power_supplies = [mock_psu] + + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + sensors = task.driver.management._get_sensors_power(mock_chassis) + + expected = { + '0:Power@ZZZ-YYY-XXX': { + 'health': 'OK', + 'last_power_output_watts': 650, + 'line_input_voltage': 220, + 'maximum_frequency_hz': 63, + 'maximum_voltage': 250, + 'minimum_frequency_hz': 47, + 'minimum_voltage': 185, + 'output_wattage': 1450, + 'power_capacity_watts': 1450, + 'serial_number': 'SN010203040506', + 'state': 'enabled' + } + } + + self.assertEqual(expected, sensors) + + def test__get_sensors_data_drive(self): + attributes = { + 'identity': '32ADF365C6C1B7BD', + 'model': 'IBM 350A', + 'capacity_bytes': 3750000000, + 'failure_predicted': True, + 'serial_number': 'SN010203040506', + 'status': { + 'health': 'OK', + 'state': 'enabled' + } + } + + mock_system = mock.MagicMock(identity='ZZZ-YYY-XXX') + mock_drive = mock.MagicMock(**attributes) + mock_drive.status = mock.MagicMock(**attributes['status']) + mock_storage = mock.MagicMock() + mock_storage.devices = [mock_drive] + mock_system.simple_storage.identity = 'XXX-YYY-ZZZ' + mock_system.simple_storage.get_members.return_value = [mock_storage] + + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + sensors = task.driver.management._get_sensors_drive(mock_system) + + expected = { + '32ADF365C6C1B7BD:XXX-YYY-ZZZ@ZZZ-YYY-XXX': { + 'capacity_bytes': 3750000000, + 'failure_predicted': True, + 'health': 'OK', + 'identity': '32ADF365C6C1B7BD', + 'model': 'IBM 350A', + 'state': 'enabled' + } + } + + self.assertEqual(expected, sensors) + + @mock.patch.object(redfish_utils, 'get_system', autospec=True) + def test_get_sensors_data(self, mock_system): + mock_chassis = mock.MagicMock() + mock_system.return_value.chassis = [mock_chassis] + + with task_manager.acquire(self.context, self.node.uuid, + shared=True) as task: + sensors = task.driver.management.get_sensors_data(task) + + expected = { + 'Fan': {}, + 'Temperature': {}, + 'Power': {}, + 'Drive': {} + } + + self.assertEqual(expected, sensors) @mock.patch.object(redfish_utils, 'get_system', autospec=True) def test_inject_nmi(self, mock_get_system): diff --git a/releasenotes/notes/add-redfish-sensors-4e2f7e3f8a7c6d5b.yaml b/releasenotes/notes/add-redfish-sensors-4e2f7e3f8a7c6d5b.yaml new file mode 100644 index 0000000000..cb89610300 --- /dev/null +++ b/releasenotes/notes/add-redfish-sensors-4e2f7e3f8a7c6d5b.yaml @@ -0,0 +1,5 @@ +--- +features: + - | + Adds sensor data collector to ``redfish`` management interface. + Temperature, power, cooling and drive health metrics are collected.