Burn-in: Add memory step

Add a clean step for memory burn-in via stress-ng. Get basic
run parameters from the node's driver_info.

Story: #2007523
Task: #42383

Change-Id: I33a83968c9f87cf795ec7ec922bce98b52c5181c
This commit is contained in:
Arne Wiebalck 2021-04-30 10:44:53 +02:00
parent 6702fcaa43
commit 5c222560f0
6 changed files with 99 additions and 0 deletions

View File

@ -77,6 +77,9 @@ Clean steps
``deploy.burnin_cpu`` ``deploy.burnin_cpu``
Stress-test the CPUs of a node via stress-ng for a configurable Stress-test the CPUs of a node via stress-ng for a configurable
amount of time. Disabled by default. amount of time. Disabled by default.
``deploy.burnin_memory``
Stress-test the memory of a node via stress-ng for a configurable
amount of time. Disabled by default.
``deploy.erase_devices`` ``deploy.erase_devices``
Securely erases all information from all recognized disk devices. Securely erases all information from all recognized disk devices.
Relatively fast when secure ATA erase is available, otherwise can take Relatively fast when secure ATA erase is available, otherwise can take

View File

@ -46,3 +46,35 @@ def stress_ng_cpu(node):
{'err': e}) {'err': e})
LOG.error(error_msg) LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg) raise errors.CommandExecutionError(error_msg)
def stress_ng_vm(node):
"""Burn-in the memory with the vm stressor in stress-ng
Run stress-ng with a configurable number of workers on
a configurable amount of the available memory for
a configurable amount of time. Without config use
as many workers as CPUs, 98% of the memory and stress
it for 24 hours.
:param node: Ironic node object
:raises: CommandExecutionError if the execution of stress-ng fails.
"""
info = node.get('driver_info', {})
vm = info.get('agent_burnin_vm_vm', 0)
vm_bytes = info.get('agent_burnin_vm_vm-bytes', '98%')
timeout = info.get('agent_burnin_vm_timeout', 86400)
args = ('stress-ng', '--vm', vm, '--vm-bytes', vm_bytes,
'--timeout', timeout, '--metrics-brief')
LOG.debug('Burn-in stress_ng_vm command: %s', args)
try:
_, err = utils.execute(*args)
# stress-ng reports on stderr only
LOG.info(err)
except (processutils.ProcessExecutionError, OSError) as e:
error_msg = ("stress-ng (vm) failed with error %(err)s",
{'err': e})
LOG.error(error_msg)
raise errors.CommandExecutionError(error_msg)

View File

@ -1402,6 +1402,14 @@ class GenericHardwareManager(HardwareManager):
""" """
burnin.stress_ng_cpu(node) burnin.stress_ng_cpu(node)
def burnin_memory(self, node, ports):
"""Burn-in the memory
:param node: Ironic node object
:param ports: list of Ironic port objects
"""
burnin.stress_ng_vm(node)
def _shred_block_device(self, node, block_device): def _shred_block_device(self, node, block_device):
"""Erase a block device using shred. """Erase a block device using shred.
@ -1882,6 +1890,13 @@ class GenericHardwareManager(HardwareManager):
'reboot_requested': False, 'reboot_requested': False,
'abortable': True 'abortable': True
}, },
{
'step': 'burnin_memory',
'priority': 0,
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
},
] ]
def get_deploy_steps(self, node, ports): def get_deploy_steps(self, node, ports):

View File

@ -54,3 +54,38 @@ class TestBurnin(base.IronicAgentTest):
self.assertRaises(errors.CommandExecutionError, self.assertRaises(errors.CommandExecutionError,
burnin.stress_ng_cpu, node) burnin.stress_ng_cpu, node)
def test_stress_ng_vm_default(self, mock_execute):
node = {'driver_info': {}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_vm(node)
mock_execute.assert_called_once_with(
'stress-ng', '--vm', 0, '--vm-bytes', '98%',
'--timeout', 86400, '--metrics-brief')
def test_stress_ng_vm_non_default(self, mock_execute):
node = {'driver_info': {'agent_burnin_vm_vm': 2,
'agent_burnin_vm_vm-bytes': '25%',
'agent_burnin_vm_timeout': 120}}
mock_execute.return_value = (['out', 'err'])
burnin.stress_ng_vm(node)
mock_execute.assert_called_once_with(
'stress-ng', '--vm', 2, '--vm-bytes', '25%',
'--timeout', 120, '--metrics-brief')
def test_stress_ng_vm_no_stress_ng(self, mock_execute):
node = {'driver_info': {}}
mock_execute.side_effect = (['out', 'err'],
processutils.ProcessExecutionError())
burnin.stress_ng_vm(node)
self.assertRaises(errors.CommandExecutionError,
burnin.stress_ng_vm, node)

View File

@ -156,6 +156,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
'interface': 'deploy', 'interface': 'deploy',
'reboot_requested': False, 'reboot_requested': False,
'abortable': True 'abortable': True
},
{
'step': 'burnin_memory',
'priority': 0,
'interface': 'deploy',
'reboot_requested': False,
'abortable': True
} }
] ]
clean_steps = self.hardware.get_clean_steps(self.node, []) clean_steps = self.hardware.get_clean_steps(self.node, [])

View File

@ -0,0 +1,7 @@
---
features:
- |
Adds a burn-in cleaning step 'burnin_memory' to stress test memory for a
configurable amount of time with stress-ng. To use this step, stress-ng
needs to be installed on the RAM disk.