Merge "Fix waiting for target disk to appear"

This commit is contained in:
Zuul 2017-10-17 14:24:05 +00:00 committed by Gerrit Code Review
commit ce32efc82b
8 changed files with 158 additions and 102 deletions

View File

@ -180,7 +180,7 @@ cli_opts = [
default=APARAMS.get('ipa-disk-wait-delay', 3),
help='How much time (in seconds) to wait between attempts '
'to check if at least one suitable disk has appeared '
'in inventory. '
'in inventory. Set to zero to disable. '
'Can be supplied as "ipa-disk-wait-delay" '
'kernel parameter.'),
cfg.BoolOpt('insecure',

View File

@ -381,6 +381,33 @@ class HardwareManager(object):
erase_results[block_device.name] = result
return erase_results
def wait_for_disks(self):
"""Wait for the root disk to appear.
Wait for at least one suitable disk to show up or a specific disk
if any device hint is specified. Otherwise neither inspection
not deployment have any chances to succeed.
"""
if not CONF.disk_wait_attempts:
return
for attempt in range(CONF.disk_wait_attempts):
try:
self.get_os_install_device()
except errors.DeviceNotFound:
LOG.debug('Still waiting for the root device to appear, '
'attempt %d of %d', attempt + 1,
CONF.disk_wait_attempts)
if attempt < CONF.disk_wait_attempts - 1:
time.sleep(CONF.disk_wait_delay)
else:
break
else:
LOG.warning('The root device was not detected in %d seconds',
CONF.disk_wait_delay * CONF.disk_wait_attempts)
def list_hardware_info(self):
"""Return full hardware inventory as a serializable dict.
@ -489,32 +516,9 @@ class GenericHardwareManager(HardwareManager):
def evaluate_hardware_support(self):
# Do some initialization before we declare ourself ready
_check_for_iscsi()
self._wait_for_disks()
self.wait_for_disks()
return HardwareSupport.GENERIC
def _wait_for_disks(self):
"""Wait for disk to appear
Wait for at least one suitable disk to show up, otherwise neither
inspection not deployment have any chances to succeed.
"""
for attempt in range(CONF.disk_wait_attempts):
try:
block_devices = self.list_block_devices()
utils.guess_root_disk(block_devices)
except errors.DeviceNotFound:
LOG.debug('Still waiting for at least one disk to appear, '
'attempt %d of %d', attempt + 1,
CONF.disk_wait_attempts)
time.sleep(CONF.disk_wait_delay)
else:
break
else:
LOG.warning('No disks detected in %d seconds',
CONF.disk_wait_delay * CONF.disk_wait_attempts)
def collect_lldp_data(self, interface_names):
"""Collect and convert LLDP info from the node.
@ -705,10 +709,12 @@ class GenericHardwareManager(HardwareManager):
root_device_hints = None
if cached_node is not None:
root_device_hints = cached_node['properties'].get('root_device')
LOG.debug('Looking for a device matching root hints %s',
root_device_hints)
block_devices = self.list_block_devices()
if not root_device_hints:
return utils.guess_root_disk(block_devices).name
dev_name = utils.guess_root_disk(block_devices).name
else:
serialized_devs = [dev.serialize() for dev in block_devices]
try:
@ -729,7 +735,13 @@ class GenericHardwareManager(HardwareManager):
"No suitable device was found for "
"deployment using these hints %s" % root_device_hints)
return device['name']
dev_name = device['name']
LOG.info('Picked root device %(dev)s for node %(node)s based on '
'root device hints %(hints)s',
{'dev': dev_name, 'hints': root_device_hints,
'node': cached_node['uuid'] if cached_node else None})
return dev_name
def get_system_vendor_info(self):
product_name = None
@ -1159,11 +1171,22 @@ def cache_node(node):
Stores the node object in the hardware module to facilitate the
access of a node information in the hardware extensions.
If the new node does not match the previously cached one, wait for the
expected root device to appear.
:param node: Ironic node object
"""
global NODE
new_node = NODE is None or NODE['uuid'] != node['uuid']
NODE = node
if new_node:
LOG.info('Cached node %s, waiting for its root device to appear',
node['uuid'])
# Root device hints, stored in the new node, can change the expected
# root device. So let us wait for it to appear again.
dispatch_to_managers('wait_for_disks')
def get_cached_node():
"""Guard function around the module variable NODE."""

View File

@ -24,6 +24,8 @@ class TestCommands(base.FunctionalBase):
different test runs.
"""
node = {'uuid': '1', 'properties': {}}
def step_1_get_empty_commands(self):
response = self.request('get', 'commands')
self.assertEqual({'commands': []}, response)
@ -34,7 +36,7 @@ class TestCommands(base.FunctionalBase):
# this command succeeds even with an empty node and port. This test's
# success is required for steps 3 and 4 to succeed.
command = {'name': 'clean.get_clean_steps',
'params': {'node': {}, 'ports': {}}}
'params': {'node': self.node, 'ports': {}}}
response = self.request('post', 'commands', json=command,
headers={'Content-Type': 'application/json'})
self.assertIsNone(response['command_error'])

View File

@ -19,6 +19,7 @@ from ironic_python_agent import errors
from ironic_python_agent.extensions import clean
@mock.patch('ironic_python_agent.hardware.cache_node', autospec=True)
class TestCleanExtension(test_base.BaseTestCase):
def setUp(self):
super(TestCleanExtension, self).setUp()
@ -37,7 +38,8 @@ class TestCleanExtension(test_base.BaseTestCase):
'_get_current_clean_version', autospec=True)
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers',
autospec=True)
def test_get_clean_steps(self, mock_dispatch, mock_version):
def test_get_clean_steps(self, mock_dispatch, mock_version,
mock_cache_node):
mock_version.return_value = self.version
manager_steps = {
@ -135,12 +137,14 @@ class TestCleanExtension(test_base.BaseTestCase):
# 'priority' in Ironic
self.assertEqual(expected_return,
async_results.join().command_result)
mock_cache_node.assert_called_once_with(self.node)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version',
autospec=True)
def test_execute_clean_step(self, mock_version, mock_dispatch):
def test_execute_clean_step(self, mock_version, mock_dispatch,
mock_cache_node):
result = 'cleaned'
mock_dispatch.return_value = result
@ -159,13 +163,14 @@ class TestCleanExtension(test_base.BaseTestCase):
self.step['GenericHardwareManager'][0]['step'],
self.node, self.ports)
self.assertEqual(expected_result, async_result.command_result)
mock_cache_node.assert_called_once_with(self.node)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version',
autospec=True)
def test_execute_clean_step_tuple_result(self, mock_version,
mock_dispatch):
mock_dispatch, mock_cache_node):
result = ('stdout', 'stderr')
mock_dispatch.return_value = result
@ -184,10 +189,11 @@ class TestCleanExtension(test_base.BaseTestCase):
self.step['GenericHardwareManager'][0]['step'],
self.node, self.ports)
self.assertEqual(expected_result, async_result.command_result)
mock_cache_node.assert_called_once_with(self.node)
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version',
autospec=True)
def test_execute_clean_step_no_step(self, mock_version):
def test_execute_clean_step_no_step(self, mock_version, mock_cache_node):
async_result = self.agent_extension.execute_clean_step(
step={}, node=self.node, ports=self.ports,
clean_version=self.version)
@ -195,12 +201,14 @@ class TestCleanExtension(test_base.BaseTestCase):
self.assertEqual('FAILED', async_result.command_status)
mock_version.assert_called_once_with(self.version)
mock_cache_node.assert_called_once_with(self.node)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version',
autospec=True)
def test_execute_clean_step_fail(self, mock_version, mock_dispatch):
def test_execute_clean_step_fail(self, mock_version, mock_dispatch,
mock_cache_node):
mock_dispatch.side_effect = RuntimeError
async_result = self.agent_extension.execute_clean_step(
@ -214,13 +222,15 @@ class TestCleanExtension(test_base.BaseTestCase):
mock_dispatch.assert_called_once_with(
self.step['GenericHardwareManager'][0]['step'],
self.node, self.ports)
mock_cache_node.assert_called_once_with(self.node)
@mock.patch('ironic_python_agent.hardware.dispatch_to_managers',
autospec=True)
@mock.patch('ironic_python_agent.extensions.clean._check_clean_version',
autospec=True)
def test_execute_clean_step_version_mismatch(self, mock_version,
mock_dispatch):
mock_dispatch,
mock_cache_node):
mock_version.side_effect = errors.CleanVersionMismatch(
{'GenericHardwareManager': 1}, {'GenericHardwareManager': 2})
@ -232,17 +242,19 @@ class TestCleanExtension(test_base.BaseTestCase):
mock_version.assert_called_once_with(self.version)
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers',
autospec=True)
def _get_current_clean_version(self, mock_dispatch):
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers',
autospec=True)
class TestCleanVersion(test_base.BaseTestCase):
version = {'generic': '1', 'specific': '1'}
def test__get_current_clean_version(self, mock_dispatch):
mock_dispatch.return_value = {'SpecificHardwareManager':
{'name': 'specific', 'version': '1'},
'GenericHardwareManager':
{'name': 'generic', 'version': '1'}}
self.assertEqual(self.version, clean._get_current_clean_version())
@mock.patch('ironic_python_agent.hardware.dispatch_to_all_managers',
autospec=True)
def test__check_clean_version_fail(self, mock_dispatch):
mock_dispatch.return_value = {'SpecificHardwareManager':
{'name': 'specific', 'version': '1'}}

View File

@ -127,7 +127,7 @@ class TestHeartbeater(ironic_agent_base.IronicAgentTest):
self.assertEqual(2.7, self.heartbeater.error_delay)
@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks',
@mock.patch.object(hardware.GenericHardwareManager, 'wait_for_disks',
lambda self: None)
class TestBaseAgent(ironic_agent_base.IronicAgentTest):
@ -151,6 +151,7 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
FakeExtension())])
self.sample_nw_iface = hardware.NetworkInterface(
"eth9", "AA:BB:CC:DD:EE:FF", "1.2.3.4", True)
hardware.NODE = None
def assertEqualEncoded(self, a, b):
# Evidently JSONEncoder.default() can't handle None (??) so we have to
@ -204,7 +205,9 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
server_class=simple_server.WSGIServer)
wsgi_server.serve_forever.assert_called_once_with()
mock_wait.assert_called_once_with(mock.ANY)
mock_dispatch.assert_called_once_with("list_hardware_info")
self.assertEqual([mock.call('list_hardware_info'),
mock.call('wait_for_disks')],
mock_dispatch.call_args_list)
self.agent.heartbeater.start.assert_called_once_with()
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@ -252,7 +255,9 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
self.agent.api_client.lookup_node.call_args[1]['node_uuid'])
mock_wait.assert_called_once_with(mock.ANY)
mock_dispatch.assert_called_once_with("list_hardware_info")
self.assertEqual([mock.call('list_hardware_info'),
mock.call('wait_for_disks')],
mock_dispatch.call_args_list)
self.agent.heartbeater.start.assert_called_once_with()
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@ -419,7 +424,9 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
mock_sleep.assert_called_once_with(10)
self.assertTrue(mock_load_managers.called)
self.assertTrue(mock_wait.called)
mock_dispatch.assert_called_once_with('list_hardware_info')
self.assertEqual([mock.call('list_hardware_info'),
mock.call('wait_for_disks')],
mock_dispatch.call_args_list)
def test_async_command_success(self):
result = base.AsyncCommandResult('foo_command', {'fail': False},
@ -507,7 +514,7 @@ class TestBaseAgent(ironic_agent_base.IronicAgentTest):
mock_log.warning.assert_called_once()
@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks',
@mock.patch.object(hardware.GenericHardwareManager, 'wait_for_disks',
lambda self: None)
class TestAgentStandalone(ironic_agent_base.IronicAgentTest):
@ -563,7 +570,7 @@ class TestAgentStandalone(ironic_agent_base.IronicAgentTest):
@mock.patch.object(hardware, '_check_for_iscsi', lambda: None)
@mock.patch.object(hardware.GenericHardwareManager, '_wait_for_disks',
@mock.patch.object(hardware.GenericHardwareManager, 'wait_for_disks',
lambda self: None)
@mock.patch.object(socket, 'gethostbyname', autospec=True)
@mock.patch.object(utils, 'execute', autospec=True)

View File

@ -680,7 +680,8 @@ class TestGenericHardwareManager(base.IronicAgentTest):
@mock.patch.object(hardware, 'get_cached_node', autospec=True)
def _get_os_install_device_root_device_hints(self, hints, expected_device,
mock_cached_node, mock_dev):
mock_cached_node.return_value = {'properties': {'root_device': hints}}
mock_cached_node.return_value = {'properties': {'root_device': hints},
'uuid': 'node1'}
model = 'fastable sd131 7'
mock_dev.return_value = [
hardware.BlockDevice(name='/dev/sda',
@ -1660,15 +1661,13 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.assertEqual('NEC',
self.hardware.get_system_vendor_info().manufacturer)
@mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(hardware, '_check_for_iscsi', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'guess_root_disk', autospec=True)
def test_evaluate_hw_waits_for_disks(self, mocked_root_dev, mocked_sleep,
mocked_check_for_iscsi,
mocked_block_dev):
mocked_root_dev.side_effect = [
def test_evaluate_hw_waits_for_disks(
self, mocked_sleep, mocked_check_for_iscsi, mocked_get_inst_dev):
mocked_get_inst_dev.side_effect = [
errors.DeviceNotFound('boom'),
None
]
@ -1677,19 +1676,32 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.assertTrue(mocked_check_for_iscsi.called)
self.assertEqual(hardware.HardwareSupport.GENERIC, result)
mocked_root_dev.assert_called_with(mocked_block_dev.return_value)
self.assertEqual(2, mocked_root_dev.call_count)
mocked_get_inst_dev.assert_called_with(mock.ANY)
self.assertEqual(2, mocked_get_inst_dev.call_count)
mocked_sleep.assert_called_once_with(CONF.disk_wait_delay)
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(hardware, '_check_for_iscsi', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'guess_root_disk', autospec=True)
def test_evaluate_hw_waits_for_disks_nonconfigured(self, mocked_root_dev,
mocked_sleep,
mocked_block_dev):
mocked_root_dev.side_effect = [
def test_evaluate_hw_no_wait_for_disks(
self, mocked_sleep, mocked_check_for_iscsi, mocked_get_inst_dev):
CONF.set_override('disk_wait_attempts', '0')
result = self.hardware.evaluate_hardware_support()
self.assertTrue(mocked_check_for_iscsi.called)
self.assertEqual(hardware.HardwareSupport.GENERIC, result)
self.assertFalse(mocked_get_inst_dev.called)
self.assertFalse(mocked_sleep.called)
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
def test_evaluate_hw_waits_for_disks_nonconfigured(
self, mocked_sleep, mocked_get_inst_dev):
mocked_get_inst_dev.side_effect = [
errors.DeviceNotFound('boom'),
errors.DeviceNotFound('boom'),
errors.DeviceNotFound('boom'),
@ -1706,20 +1718,20 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.hardware.evaluate_hardware_support()
mocked_root_dev.assert_called_with(mocked_block_dev.return_value)
self.assertEqual(10, mocked_root_dev.call_count)
mocked_get_inst_dev.assert_called_with(mock.ANY)
self.assertEqual(10, mocked_get_inst_dev.call_count)
expected_calls = [mock.call(CONF.disk_wait_delay)] * 9
mocked_sleep.assert_has_calls(expected_calls)
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'guess_root_disk', autospec=True)
def test_evaluate_hw_waits_for_disks_configured(self, mocked_root_dev,
mocked_sleep,
mocked_block_dev):
def test_evaluate_hw_waits_for_disks_configured(self, mocked_sleep,
mocked_get_inst_dev):
CONF.set_override('disk_wait_attempts', '2')
mocked_root_dev.side_effect = [
mocked_get_inst_dev.side_effect = [
errors.DeviceNotFound('boom'),
errors.DeviceNotFound('boom'),
errors.DeviceNotFound('boom'),
@ -1729,54 +1741,45 @@ class TestGenericHardwareManager(base.IronicAgentTest):
self.hardware.evaluate_hardware_support()
mocked_root_dev.assert_called_with(mocked_block_dev.return_value)
self.assertEqual(2, mocked_root_dev.call_count)
mocked_get_inst_dev.assert_called_with(mock.ANY)
self.assertEqual(2, mocked_get_inst_dev.call_count)
expected_calls = [mock.call(CONF.disk_wait_delay)] * 1
mocked_sleep.assert_has_calls(expected_calls)
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'guess_root_disk', autospec=True)
def test_evaluate_hw_disks_timeout_unconfigured(self, mocked_root_dev,
mocked_sleep,
mocked_block_dev):
mocked_root_dev.side_effect = errors.DeviceNotFound('boom')
def test_evaluate_hw_disks_timeout_unconfigured(self, mocked_sleep,
mocked_get_inst_dev):
mocked_get_inst_dev.side_effect = errors.DeviceNotFound('boom')
self.hardware.evaluate_hardware_support()
mocked_sleep.assert_called_with(3)
@mock.patch.object(hardware, '_check_for_iscsi', mock.Mock())
@mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'guess_root_disk', autospec=True)
def test_evaluate_hw_disks_timeout_configured(self, mocked_root_dev,
mocked_sleep,
mocked_block_dev):
def test_evaluate_hw_disks_timeout_configured(self, mocked_sleep,
mocked_root_dev):
CONF.set_override('disk_wait_delay', '5')
mocked_root_dev.side_effect = errors.DeviceNotFound('boom')
self.hardware.evaluate_hardware_support()
mocked_sleep.assert_called_with(5)
@mock.patch.object(hardware.GenericHardwareManager, 'list_block_devices',
autospec=True)
@mock.patch.object(hardware.GenericHardwareManager,
'get_os_install_device', autospec=True)
@mock.patch.object(hardware, '_check_for_iscsi', autospec=True)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(utils, 'guess_root_disk', autospec=True)
def test_evaluate_hw_disks_timeout(self, mocked_root_dev, mocked_sleep,
mocked_check_for_iscsi,
mocked_block_dev):
mocked_root_dev.side_effect = errors.DeviceNotFound('boom')
def test_evaluate_hw_disks_timeout(
self, mocked_sleep, mocked_check_for_iscsi, mocked_get_inst_dev):
mocked_get_inst_dev.side_effect = errors.DeviceNotFound('boom')
result = self.hardware.evaluate_hardware_support()
self.assertEqual(hardware.HardwareSupport.GENERIC, result)
mocked_root_dev.assert_called_with(mocked_block_dev.return_value)
mocked_get_inst_dev.assert_called_with(mock.ANY)
self.assertEqual(CONF.disk_wait_attempts,
mocked_root_dev.call_count)
mocked_get_inst_dev.call_count)
mocked_sleep.assert_called_with(CONF.disk_wait_delay)
@mock.patch.object(utils, 'get_agent_params',

View File

@ -53,6 +53,8 @@ class ZFakeGenericHardwareManager(hardware.HardwareManager):
_build_clean_step('ZHigherPrio', 100)]
@mock.patch.object(hardware.HardwareManager, 'wait_for_disks',
lambda _self: None)
class TestMultipleHardwareManagerCleanSteps(base.IronicAgentTest):
def setUp(self):
super(TestMultipleHardwareManagerCleanSteps, self).setUp()
@ -79,7 +81,8 @@ class TestMultipleHardwareManagerCleanSteps(base.IronicAgentTest):
hardware._global_managers = None
def test_clean_step_ordering(self):
as_results = self.agent_extension.get_clean_steps(node={}, ports=[])
as_results = self.agent_extension.get_clean_steps(node={'uuid': '1'},
ports=[])
results = as_results.join().command_result
expected_steps = {
'clean_steps': {

View File

@ -0,0 +1,6 @@
---
fixes:
- |
If root device hints are provided on the node, wait for the root device
instead of the first disk. This fixes deployment when the target disk takes
time to load.