diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst index 51a6e3e3b..2144cf9ff 100644 --- a/doc/source/admin/hardware_managers.rst +++ b/doc/source/admin/hardware_managers.rst @@ -77,6 +77,9 @@ Clean steps ``deploy.burnin_cpu`` Stress-test the CPUs of a node via stress-ng for a configurable amount of time. Disabled by default. +``deploy.burnin_memory`` + Stress-test the memory of a node via stress-ng for a configurable + amount of time. Disabled by default. ``deploy.erase_devices`` Securely erases all information from all recognized disk devices. Relatively fast when secure ATA erase is available, otherwise can take diff --git a/ironic_python_agent/burnin.py b/ironic_python_agent/burnin.py index 5a9275e2e..bd6545471 100644 --- a/ironic_python_agent/burnin.py +++ b/ironic_python_agent/burnin.py @@ -46,3 +46,35 @@ def stress_ng_cpu(node): {'err': e}) LOG.error(error_msg) raise errors.CommandExecutionError(error_msg) + + +def stress_ng_vm(node): + """Burn-in the memory with the vm stressor in stress-ng + + Run stress-ng with a configurable number of workers on + a configurable amount of the available memory for + a configurable amount of time. Without config use + as many workers as CPUs, 98% of the memory and stress + it for 24 hours. + + :param node: Ironic node object + :raises: CommandExecutionError if the execution of stress-ng fails. + """ + info = node.get('driver_info', {}) + vm = info.get('agent_burnin_vm_vm', 0) + vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%') + timeout = info.get('agent_burnin_vm_timeout', 86400) + + args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes, + '--timeout', timeout, '--metrics-brief') + LOG.debug('Burn-in stress_ng_vm command: %s', args) + + try: + _, err = utils.execute(*args) + # stress-ng reports on stderr only + LOG.info(err) + except (processutils.ProcessExecutionError, OSError) as e: + error_msg = ("stress-ng (vm) failed with error %(err)s", + {'err': e}) + LOG.error(error_msg) + raise errors.CommandExecutionError(error_msg) diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 97c45449f..0180adaed 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -1402,6 +1402,14 @@ class GenericHardwareManager(HardwareManager): """ burnin.stress_ng_cpu(node) + def burnin_memory(self, node, ports): + """Burn-in the memory + + :param node: Ironic node object + :param ports: list of Ironic port objects + """ + burnin.stress_ng_vm(node) + def _shred_block_device(self, node, block_device): """Erase a block device using shred. @@ -1882,6 +1890,13 @@ class GenericHardwareManager(HardwareManager): 'reboot_requested': False, 'abortable': True }, + { + 'step': 'burnin_memory', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True + }, ] def get_deploy_steps(self, node, ports): diff --git a/ironic_python_agent/tests/unit/test_burnin.py b/ironic_python_agent/tests/unit/test_burnin.py index d8339b6ce..7f411b9fb 100644 --- a/ironic_python_agent/tests/unit/test_burnin.py +++ b/ironic_python_agent/tests/unit/test_burnin.py @@ -54,3 +54,38 @@ class TestBurnin(base.IronicAgentTest): self.assertRaises(errors.CommandExecutionError, burnin.stress_ng_cpu, node) + + def test_stress_ng_vm_default(self, mock_execute): + + node = {'driver_info': {}} + mock_execute.return_value = (['out', 'err']) + + burnin.stress_ng_vm(node) + + mock_execute.assert_called_once_with( + 'stress-ng', '--vm', 0, '--vm-bytes', '98%', + '--timeout', 86400, '--metrics-brief') + + def test_stress_ng_vm_non_default(self, mock_execute): + + node = {'driver_info': {'agent_burnin_vm_vm': 2, + 'agent_burnin_vm_vm-bytes': '25%', + 'agent_burnin_vm_timeout': 120}} + mock_execute.return_value = (['out', 'err']) + + burnin.stress_ng_vm(node) + + mock_execute.assert_called_once_with( + 'stress-ng', '--vm', 2, '--vm-bytes', '25%', + '--timeout', 120, '--metrics-brief') + + def test_stress_ng_vm_no_stress_ng(self, mock_execute): + + node = {'driver_info': {}} + mock_execute.side_effect = (['out', 'err'], + processutils.ProcessExecutionError()) + + burnin.stress_ng_vm(node) + + self.assertRaises(errors.CommandExecutionError, + burnin.stress_ng_vm, node) diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 019a14c0d..5884344c0 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -156,6 +156,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): 'interface': 'deploy', 'reboot_requested': False, 'abortable': True + }, + { + 'step': 'burnin_memory', + 'priority': 0, + 'interface': 'deploy', + 'reboot_requested': False, + 'abortable': True } ] clean_steps = self.hardware.get_clean_steps(self.node, []) diff --git a/releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml b/releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml new file mode 100644 index 000000000..8aeb854ec --- /dev/null +++ b/releasenotes/notes/add_burnin_memory-4099ca42bd3b99db.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Adds a burn-in cleaning step 'burnin_memory' to stress test memory for a + configurable amount of time with stress-ng. To use this step, stress-ng + needs to be installed on the RAM disk. +