Make logs collection a hardware manager call

This allows hardware managers to collect additional logs.

Change-Id: If082b921d4bf71c4cc41a5a72db6995b08637374
This commit is contained in:
Dmitry Tantsur 2023-01-25 13:33:29 +01:00
parent 06413e54a0
commit c26f498f49
5 changed files with 158 additions and 153 deletions

View File

@ -16,6 +16,7 @@ import abc
import binascii
import collections
import functools
import io
import ipaddress
import json
from multiprocessing.pool import ThreadPool
@ -1173,6 +1174,18 @@ class HardwareManager(object, metaclass=abc.ABCMeta):
'version': getattr(self, 'HARDWARE_MANAGER_VERSION', '1.0')
}
def collect_system_logs(self, io_dict, file_list):
"""Collect logs from the system.
Implementations should update `io_dict` and `file_list` with logs
to send to Ironic and Inspector.
:param io_dict: Dictionary mapping file names to binary IO objects
with corresponding data.
:param file_list: List of full file paths to include.
"""
raise errors.IncompatibleHardwareMethodError()
class GenericHardwareManager(HardwareManager):
HARDWARE_MANAGER_NAME = 'generic_hardware_manager'
@ -2886,6 +2899,64 @@ class GenericHardwareManager(HardwareManager):
"""
return inject_files.inject_files(node, ports, files, verify_ca)
def collect_system_logs(self, io_dict, file_list):
commands = {
'df': ['df', '-a'],
'dmesg': ['dmesg'],
'iptables': ['iptables', '-L'],
'ip_addr': ['ip', 'addr'],
'lsblk': ['lsblk', '--all',
'-o%s' % ','.join(utils.LSBLK_COLUMNS)],
'lsblk-full': ['lsblk', '--all', '--bytes',
'--output-all', '--pairs'],
'lshw': ['lshw', '-quiet', '-json'],
'mdstat': ['cat', '/proc/mdstat'],
'mount': ['mount'],
'multipath': ['multipath', '-ll'],
'parted': ['parted', '-l'],
'ps': ['ps', 'au'],
}
for name, cmd in commands.items():
utils.try_collect_command_output(io_dict, name, cmd)
_collect_udev(io_dict)
def _collect_udev(io_dict):
"""Collect device properties from udev."""
try:
out, _e = il_utils.execute('lsblk', '-no', 'KNAME')
except processutils.ProcessExecutionError as exc:
LOG.warning('Could not list block devices: %s', exc)
return
context = pyudev.Context()
for kname in out.splitlines():
kname = kname.strip()
if not kname:
continue
name = os.path.join('/dev', kname)
try:
udev = pyudev.Devices.from_device_file(context, name)
except Exception as e:
LOG.warning("Device %(dev)s is inaccessible, skipping... "
"Error: %(error)s", {'dev': name, 'error': e})
continue
try:
props = dict(udev.properties)
except AttributeError: # pyudev < 0.20
props = dict(udev)
fp = io.TextIOWrapper(io.BytesIO(), encoding='utf-8')
json.dump(props, fp)
buf = fp.detach()
buf.seek(0)
io_dict[f'udev/{kname}'] = buf
def _compare_extensions(ext1, ext2):
mgr1 = ext1.obj

View File

@ -13,6 +13,7 @@
# limitations under the License.
import binascii
import json
import os
import shutil
import stat
@ -5916,3 +5917,53 @@ class TestProtectedDiskSafetyChecks(base.IronicAgentTest):
hardware.safety_check_block_device,
{}, '/dev/foo')
self.assertEqual(1, mock_execute.call_count)
@mock.patch.object(il_utils, 'execute', autospec=True)
class TestCollectSystemLogs(base.IronicAgentTest):
def setUp(self):
super().setUp()
self.hardware = hardware.GenericHardwareManager()
@mock.patch('pyudev.Context', lambda: mock.sentinel.context)
@mock.patch('pyudev.Devices.from_device_file', autospec=True)
def test__collect_udev(self, mock_from_dev, mock_execute):
mock_execute.return_value = """
fake0
fake1
fake42
""", ""
mock_from_dev.side_effect = [
mock.Mock(properties={'ID_UUID': '0'}),
RuntimeError('nope'),
{'ID_UUID': '42'}
]
result = {}
hardware._collect_udev(result)
self.assertEqual({'udev/fake0', 'udev/fake42'}, set(result))
for i in ('0', '42'):
buf = result[f'udev/fake{i}']
# Avoiding getvalue on purpose - checking that the IO is not closed
val = json.loads(buf.read().decode('utf-8'))
self.assertEqual({'ID_UUID': i}, val)
@mock.patch.object(hardware, '_collect_udev', autospec=True)
def test_collect_system_logs(self, mock_udev, mock_execute):
commands = set()
expected = {'df', 'dmesg', 'iptables', 'ip', 'lsblk',
'lshw', 'cat', 'mount', 'multipath', 'parted', 'ps'}
def fake_execute(cmd, *args, **kwargs):
commands.add(cmd)
return cmd.encode(), ''
mock_execute.side_effect = fake_execute
io_dict = {}
file_list = []
self.hardware.collect_system_logs(io_dict, file_list)
self.assertEqual(commands, expected)
self.assertGreaterEqual(len(io_dict), len(expected))

View File

@ -17,7 +17,6 @@ import base64
import errno
import glob
import io
import json
import os
import shutil
import subprocess
@ -411,14 +410,12 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
mock_call.side_effect = os_error
self.assertFalse(utils.is_journalctl_present())
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
@mock.patch.object(utils, 'get_journalctl_output', autospec=True)
def test_collect_system_logs_journald(
self, mock_logs, mock_outputs, mock_journalctl, mock_gzip_b64,
mock_udev):
self, mock_logs, mock_journalctl, mock_dispatch, mock_gzip_b64):
mock_journalctl.return_value = True
ret = 'Patrick Star'
mock_gzip_b64.return_value = ret
@ -426,28 +423,17 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string)
mock_logs.assert_called_once_with(lines=None)
calls = [mock.call(['ps', 'au']), mock.call(['df', '-a']),
mock.call(['iptables', '-L']), mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with(
io_dict={'journal': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY,
'iptables': mock.ANY, 'ip_addr': mock.ANY,
'lshw': mock.ANY, 'lsblk': mock.ANY,
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=[])
mock_udev.assert_called_once_with(mock.ANY)
io_dict=mock.ANY, file_list=[])
mock_dispatch.assert_called_once_with('collect_system_logs',
mock.ANY, [])
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
@mock.patch.object(utils, 'get_journalctl_output', autospec=True)
def test_collect_system_logs_journald_with_logfile(
self, mock_logs, mock_outputs, mock_journalctl, mock_gzip_b64,
mock_udev):
self, mock_logs, mock_journalctl, mock_dispatch, mock_gzip_b64):
tmp = tempfile.NamedTemporaryFile()
self.addCleanup(lambda: tmp.close())
@ -459,55 +445,32 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string)
mock_logs.assert_called_once_with(lines=None)
calls = [mock.call(['ps', 'au']), mock.call(['df', '-a']),
mock.call(['iptables', '-L']), mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with(
io_dict={'journal': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY,
'iptables': mock.ANY, 'ip_addr': mock.ANY,
'lshw': mock.ANY, 'lsblk': mock.ANY,
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=[tmp.name])
mock_udev.assert_called_once_with(mock.ANY)
io_dict=mock.ANY, file_list=[tmp.name])
mock_dispatch.assert_called_once_with('collect_system_logs',
mock.ANY, [tmp.name])
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
def test_collect_system_logs_non_journald(
self, mock_outputs, mock_journalctl, mock_gzip_b64,
mock_udev):
self, mock_journalctl, mock_dispatch, mock_gzip_b64):
mock_journalctl.return_value = False
ret = 'SpongeBob SquarePants'
mock_gzip_b64.return_value = ret
logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string)
calls = [mock.call(['dmesg']), mock.call(['ps', 'au']),
mock.call(['df', '-a']), mock.call(['iptables', '-L']),
mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with(
io_dict={'dmesg': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY,
'iptables': mock.ANY, 'ip_addr': mock.ANY,
'lshw': mock.ANY, 'lsblk': mock.ANY,
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=['/var/log'])
mock_udev.assert_called_once_with(mock.ANY)
io_dict=mock.ANY, file_list=['/var/log'])
mock_dispatch.assert_called_once_with('collect_system_logs',
mock.ANY, ['/var/log'])
@mock.patch.object(utils, '_collect_udev', autospec=True)
@mock.patch.object(utils, 'gzip_and_b64encode', autospec=True)
@mock.patch.object(hardware, 'dispatch_to_all_managers', autospec=True)
@mock.patch.object(utils, 'is_journalctl_present', autospec=True)
@mock.patch.object(utils, 'get_command_output', autospec=True)
def test_collect_system_logs_non_journald_with_logfile(
self, mock_outputs, mock_journalctl, mock_gzip_b64,
mock_udev):
self, mock_journalctl, mock_dispatch, mock_gzip_b64):
tmp = tempfile.NamedTemporaryFile()
self.addCleanup(lambda: tmp.close())
@ -518,44 +481,10 @@ class TestUtils(ironic_agent_base.IronicAgentTest):
logs_string = utils.collect_system_logs()
self.assertEqual(ret, logs_string)
calls = [mock.call(['dmesg']), mock.call(['ps', 'au']),
mock.call(['df', '-a']), mock.call(['iptables', '-L']),
mock.call(['ip', 'addr']),
mock.call(['lshw', '-quiet', '-json'])]
mock_outputs.assert_has_calls(calls, any_order=True)
mock_gzip_b64.assert_called_once_with(
io_dict={'dmesg': mock.ANY, 'ps': mock.ANY, 'df': mock.ANY,
'iptables': mock.ANY, 'ip_addr': mock.ANY,
'lshw': mock.ANY, 'lsblk': mock.ANY,
'lsblk-full': mock.ANY, 'mdstat': mock.ANY,
'mount': mock.ANY, 'parted': mock.ANY,
'multipath': mock.ANY},
file_list=['/var/log', tmp.name])
mock_udev.assert_called_once_with(mock.ANY)
@mock.patch('pyudev.Context', lambda: mock.sentinel.context)
@mock.patch('pyudev.Devices.from_device_file', autospec=True)
@mock.patch.object(ironic_utils, 'execute', autospec=True)
def test_collect_udev(self, mock_execute, mock_from_dev):
mock_execute.return_value = """
fake0
fake1
fake42
""", ""
mock_from_dev.side_effect = [
mock.Mock(properties={'ID_UUID': '0'}),
RuntimeError('nope'),
{'ID_UUID': '42'}
]
result = {}
utils._collect_udev(result)
self.assertEqual({'udev/fake0', 'udev/fake42'}, set(result))
for i in ('0', '42'):
buf = result[f'udev/fake{i}']
# Avoiding getvalue on purpose - checking that the IO is not closed
val = json.loads(buf.read().decode('utf-8'))
self.assertEqual({'ID_UUID': i}, val)
io_dict=mock.ANY, file_list=['/var/log', tmp.name])
mock_dispatch.assert_called_once_with('collect_system_logs',
mock.ANY, ['/var/log', tmp.name])
def test_get_ssl_client_options(self):
# defaults

View File

@ -33,7 +33,6 @@ from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_utils import units
import pyudev
import requests
import tenacity
@ -63,21 +62,6 @@ LSBLK_COLUMNS = ['KNAME', 'MODEL', 'SIZE', 'ROTA',
'TYPE', 'UUID', 'PARTUUID', 'SERIAL']
COLLECT_LOGS_COMMANDS = {
'ps': ['ps', 'au'],
'df': ['df', '-a'],
'iptables': ['iptables', '-L'],
'ip_addr': ['ip', 'addr'],
'lshw': ['lshw', '-quiet', '-json'],
'lsblk': ['lsblk', '--all', '-o%s' % ','.join(LSBLK_COLUMNS)],
'lsblk-full': ['lsblk', '--all', '--bytes', '--output-all', '--pairs'],
'mdstat': ['cat', '/proc/mdstat'],
'mount': ['mount'],
'parted': ['parted', '-l'],
'multipath': ['multipath', '-ll'],
}
DEVICE_EXTRACTOR = re.compile(r'^(?:(.*\d)p|(.*\D))(?:\d+)$')
@ -540,40 +524,12 @@ def gzip_and_b64encode(io_dict=None, file_list=None):
return _encode_as_text(fp.getvalue())
def _collect_udev(io_dict):
"""Collect device properties from udev."""
def try_collect_command_output(io_dict, file_name, command):
LOG.debug('Collecting command output: %s', command)
try:
out, _e = ironic_utils.execute('lsblk', '-no', 'KNAME')
except processutils.ProcessExecutionError as exc:
LOG.warning('Could not list block devices: %s', exc)
return
context = pyudev.Context()
for kname in out.splitlines():
kname = kname.strip()
if not kname:
continue
name = os.path.join('/dev', kname)
try:
udev = pyudev.Devices.from_device_file(context, name)
except Exception as e:
LOG.warning("Device %(dev)s is inaccessible, skipping... "
"Error: %(error)s", {'dev': name, 'error': e})
continue
try:
props = dict(udev.properties)
except AttributeError: # pyudev < 0.20
props = dict(udev)
fp = io.TextIOWrapper(io.BytesIO(), encoding='utf-8')
json.dump(props, fp)
buf = fp.detach()
buf.seek(0)
io_dict[f'udev/{kname}'] = buf
io_dict[file_name] = get_command_output(command)
except errors.CommandExecutionError:
LOG.debug('Collecting logs from command %s has failed', command)
def collect_system_logs(journald_max_lines=None):
@ -589,12 +545,6 @@ def collect_system_logs(journald_max_lines=None):
"""
LOG.info('Collecting system logs and debugging information')
def try_get_command_output(io_dict, file_name, command):
try:
io_dict[file_name] = get_command_output(command)
except errors.CommandExecutionError:
LOG.debug('Collecting logs from command %s has failed', command)
io_dict = {}
file_list = []
log_locations = [CONF.log_file, CONF.log_dir]
@ -604,20 +554,19 @@ def collect_system_logs(journald_max_lines=None):
if log_loc and os.path.exists(log_loc):
file_list.append(log_loc)
else:
try_get_command_output(io_dict, 'dmesg', ['dmesg'])
file_list.append('/var/log')
for log_loc in log_locations:
if (log_loc and os.path.exists(log_loc)
and not log_loc.startswith('/var/log')):
file_list.append(log_loc)
for name, cmd in COLLECT_LOGS_COMMANDS.items():
try_get_command_output(io_dict, name, cmd)
# Avoid circular imports
from ironic_python_agent import hardware
try:
_collect_udev(io_dict)
except Exception:
LOG.exception('Unexpected error when collecting udev properties')
hardware.dispatch_to_all_managers('collect_system_logs',
io_dict, file_list)
except errors.HardwareManagerMethodNotFound:
LOG.warning('All hardware managers failed to collect logs')
return gzip_and_b64encode(io_dict=io_dict, file_list=file_list)

View File

@ -0,0 +1,5 @@
---
features:
- |
Ramdisk log collection now uses the hardware managers mechanism. You can
provide a `collect_system_logs` call to extend the collection process.