diff --git a/ironic_python_agent/cmd/agent.py b/ironic_python_agent/cmd/agent.py index 405c969d2..fc8b92679 100644 --- a/ironic_python_agent/cmd/agent.py +++ b/ironic_python_agent/cmd/agent.py @@ -114,6 +114,13 @@ cli_opts = [ help='Comma-separated list of plugins providing additional ' 'hardware data for inspection, empty value gives ' 'a minimum required set of plugins.'), + + cfg.IntOpt('inspection_dhcp_wait_timeout', + default=APARAMS.get('ipa-inspection-dhcp-wait-timeout', + inspector.DEFAULT_DHCP_WAIT_TIMEOUT), + help='Maximum time (in seconds) to wait for all NIC\'s ' + 'to get their IP addresses via DHCP before inspection. ' + 'Set to 0 to disable waiting completely.'), ] CONF.register_cli_opts(cli_opts) diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index e3af49cf9..b45d11914 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -175,12 +175,14 @@ class BlockDevice(encoding.SerializableComparable): class NetworkInterface(encoding.SerializableComparable): serializable_fields = ('name', 'mac_address', 'switch_port_descr', - 'switch_chassis_descr', 'ipv4_address') + 'switch_chassis_descr', 'ipv4_address', + 'has_carrier') - def __init__(self, name, mac_addr, ipv4_address=None): + def __init__(self, name, mac_addr, ipv4_address=None, has_carrier=True): self.name = name self.mac_address = mac_addr self.ipv4_address = ipv4_address + self.has_carrier = has_carrier # TODO(russellhaering): Pull these from LLDP self.switch_port_descr = None self.switch_chassis_descr = None @@ -402,7 +404,8 @@ class GenericHardwareManager(HardwareManager): return NetworkInterface( interface_name, mac_addr, - ipv4_address=self.get_ipv4_addr(interface_name)) + ipv4_address=self.get_ipv4_addr(interface_name), + has_carrier=self._interface_has_carrier(interface_name)) def get_ipv4_addr(self, interface_id): try: @@ -412,6 +415,17 @@ class GenericHardwareManager(HardwareManager): # No default IPv4 address found return None + def _interface_has_carrier(self, interface_name): + path = '{0}/class/net/{1}/carrier'.format(self.sys_path, + interface_name) + try: + with open(path, 'rt') as fp: + return fp.read().strip() == '1' + except EnvironmentError: + LOG.debug('No carrier information for interface %s', + interface_name) + return False + def _is_device(self, interface_name): device_path = '{0}/class/net/{1}/device'.format(self.sys_path, interface_name) diff --git a/ironic_python_agent/inspector.py b/ironic_python_agent/inspector.py index 1ea2e3d1f..ba42e7c88 100644 --- a/ironic_python_agent/inspector.py +++ b/ironic_python_agent/inspector.py @@ -17,6 +17,7 @@ import base64 import io import json import tarfile +import time import netaddr from oslo_concurrency import processutils @@ -36,6 +37,9 @@ from ironic_python_agent import utils LOG = logging.getLogger(__name__) CONF = cfg.CONF DEFAULT_COLLECTOR = 'default' +DEFAULT_DHCP_WAIT_TIMEOUT = 60 + +_DHCP_RETRY_INTERVAL = 2 _COLLECTOR_NS = 'ironic_python_agent.inspector.collectors' _NO_LOGGING_FIELDS = ('logs',) @@ -215,6 +219,38 @@ def discover_scheduling_properties(inventory, data, root_disk=None): LOG.info('value for %s is %s', key, data[key]) +def wait_for_dhcp(): + """Wait until all NIC's get their IP addresses via DHCP or timeout happens. + + Ignores interfaces which do not even have a carrier. + + Note: only supports IPv4 addresses for now. + + :return: True if all NIC's got IP addresses, False if timeout happened. + Also returns True if waiting is disabled via configuration. + """ + if not CONF.inspection_dhcp_wait_timeout: + return True + + threshold = time.time() + CONF.inspection_dhcp_wait_timeout + while time.time() <= threshold: + interfaces = hardware.dispatch_to_managers('list_network_interfaces') + missing = [iface.name for iface in interfaces + if iface.has_carrier and not iface.ipv4_address] + if not missing: + return True + + LOG.debug('Still waiting for interfaces %s to get IP addresses', + missing) + time.sleep(_DHCP_RETRY_INTERVAL) + + LOG.warning('Not all network interfaces received IP addresses in ' + '%(timeout)d seconds: %(missing)s', + {'timeout': CONF.inspection_dhcp_wait_timeout, + 'missing': missing}) + return False + + def collect_default(data, failures): """The default inspection collector. @@ -229,6 +265,7 @@ def collect_default(data, failures): :param data: mutable data that we'll send to inspector :param failures: AccumulatedFailures object """ + wait_for_dhcp() inventory = hardware.dispatch_to_managers('list_hardware_info') # In the future we will only need the current version of inventory, diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 9bf0b7bf4..5cb4a4182 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -268,7 +268,7 @@ class TestGenericHardwareManager(test_base.BaseTestCase): mocked_open.return_value.__enter__ = lambda s: s mocked_open.return_value.__exit__ = mock.Mock() read_mock = mocked_open.return_value.read - read_mock.return_value = '00:0c:29:8c:11:b1\n' + read_mock.side_effect = ['00:0c:29:8c:11:b1\n', '1'] mocked_ifaddresses.return_value = { netifaces.AF_INET: [{'addr': '192.168.1.2'}] } @@ -277,6 +277,32 @@ class TestGenericHardwareManager(test_base.BaseTestCase): self.assertEqual('eth0', interfaces[0].name) self.assertEqual('00:0c:29:8c:11:b1', interfaces[0].mac_address) self.assertEqual('192.168.1.2', interfaces[0].ipv4_address) + self.assertTrue(interfaces[0].has_carrier) + + @mock.patch('netifaces.ifaddresses') + @mock.patch('os.listdir') + @mock.patch('os.path.exists') + @mock.patch('six.moves.builtins.open') + def test_list_network_interfaces_no_carrier(self, + mocked_open, + mocked_exists, + mocked_listdir, + mocked_ifaddresses): + mocked_listdir.return_value = ['lo', 'eth0'] + mocked_exists.side_effect = [False, True] + mocked_open.return_value.__enter__ = lambda s: s + mocked_open.return_value.__exit__ = mock.Mock() + read_mock = mocked_open.return_value.read + read_mock.side_effect = ['00:0c:29:8c:11:b1\n', OSError('boom')] + mocked_ifaddresses.return_value = { + netifaces.AF_INET: [{'addr': '192.168.1.2'}] + } + interfaces = self.hardware.list_network_interfaces() + self.assertEqual(1, len(interfaces)) + self.assertEqual('eth0', interfaces[0].name) + self.assertEqual('00:0c:29:8c:11:b1', interfaces[0].mac_address) + self.assertEqual('192.168.1.2', interfaces[0].ipv4_address) + self.assertFalse(interfaces[0].has_carrier) @mock.patch.object(utils, 'execute') def test_get_os_install_device(self, mocked_execute): diff --git a/ironic_python_agent/tests/unit/test_inspector.py b/ironic_python_agent/tests/unit/test_inspector.py index bd70763ca..9247a6658 100644 --- a/ironic_python_agent/tests/unit/test_inspector.py +++ b/ironic_python_agent/tests/unit/test_inspector.py @@ -18,6 +18,7 @@ import collections import copy import io import tarfile +import time import unittest import mock @@ -328,11 +329,13 @@ class TestDiscoverSchedulingProperties(BaseDiscoverTest): @mock.patch.object(utils, 'get_agent_params', lambda: {'BOOTIF': 'boot:if'}) +@mock.patch.object(inspector, 'wait_for_dhcp', autospec=True) @mock.patch.object(inspector, 'discover_scheduling_properties', autospec=True) @mock.patch.object(inspector, 'discover_network_properties', autospec=True) @mock.patch.object(hardware, 'dispatch_to_managers', autospec=True) class TestCollectDefault(BaseDiscoverTest): - def test_ok(self, mock_dispatch, mock_discover_net, mock_discover_sched): + def test_ok(self, mock_dispatch, mock_discover_net, mock_discover_sched, + mock_wait_for_dhcp): mock_dispatch.return_value = self.inventory inspector.collect_default(self.data, self.failures) @@ -351,9 +354,10 @@ class TestCollectDefault(BaseDiscoverTest): mock_discover_sched.assert_called_once_with( self.inventory, self.data, root_disk=self.inventory['disks'][0]) + mock_wait_for_dhcp.assert_called_once_with() def test_no_root_disk(self, mock_dispatch, mock_discover_net, - mock_discover_sched): + mock_discover_sched, mock_wait_for_dhcp): mock_dispatch.return_value = self.inventory self.inventory['disks'] = [] @@ -371,6 +375,7 @@ class TestCollectDefault(BaseDiscoverTest): self.failures) mock_discover_sched.assert_called_once_with( self.inventory, self.data, root_disk=None) + mock_wait_for_dhcp.assert_called_once_with() @mock.patch.object(utils, 'execute', autospec=True) @@ -453,3 +458,56 @@ class TestCollectExtraHardware(unittest.TestCase): self.assertNotIn('data', self.data) self.assertTrue(self.failures) mock_execute.assert_called_once_with('hardware-detect') + + +@mock.patch.object(hardware, 'dispatch_to_managers', autospec=True) +class TestWaitForDhcp(unittest.TestCase): + def setUp(self): + super(TestWaitForDhcp, self).setUp() + CONF.set_override('inspection_dhcp_wait_timeout', + inspector.DEFAULT_DHCP_WAIT_TIMEOUT) + + @mock.patch.object(time, 'sleep', autospec=True) + def test_ok(self, mocked_sleep, mocked_dispatch): + mocked_dispatch.side_effect = [ + [hardware.NetworkInterface(name='em0', mac_addr='abcd', + ipv4_address=None), + hardware.NetworkInterface(name='em1', mac_addr='abcd', + ipv4_address='1.2.3.4')], + [hardware.NetworkInterface(name='em0', mac_addr='abcd', + ipv4_address=None), + hardware.NetworkInterface(name='em1', mac_addr='abcd', + ipv4_address='1.2.3.4')], + [hardware.NetworkInterface(name='em0', mac_addr='abcd', + ipv4_address='1.1.1.1'), + hardware.NetworkInterface(name='em1', mac_addr='abcd', + ipv4_address='1.2.3.4')], + ] + + self.assertTrue(inspector.wait_for_dhcp()) + + mocked_dispatch.assert_called_with('list_network_interfaces') + self.assertEqual(2, mocked_sleep.call_count) + self.assertEqual(3, mocked_dispatch.call_count) + + @mock.patch.object(inspector, '_DHCP_RETRY_INTERVAL', 0.01) + def test_timeout(self, mocked_dispatch): + CONF.set_override('inspection_dhcp_wait_timeout', 0.02) + + mocked_dispatch.return_value = [ + hardware.NetworkInterface(name='em0', mac_addr='abcd', + ipv4_address=None), + hardware.NetworkInterface(name='em1', mac_addr='abcd', + ipv4_address='1.2.3.4'), + ] + + self.assertFalse(inspector.wait_for_dhcp()) + + mocked_dispatch.assert_called_with('list_network_interfaces') + + def test_disabled(self, mocked_dispatch): + CONF.set_override('inspection_dhcp_wait_timeout', 0) + + self.assertTrue(inspector.wait_for_dhcp()) + + self.assertFalse(mocked_dispatch.called) diff --git a/ironic_python_agent/tests/unit/test_ironic_api_client.py b/ironic_python_agent/tests/unit/test_ironic_api_client.py index 30ce8158f..2e3defe38 100644 --- a/ironic_python_agent/tests/unit/test_ironic_api_client.py +++ b/ironic_python_agent/tests/unit/test_ironic_api_client.py @@ -160,14 +160,16 @@ class TestBaseIronicPythonAgent(test_base.BaseTestCase): u'name': u'eth0', u'ipv4_address': None, u'switch_chassis_descr': None, - u'switch_port_descr': None + u'switch_port_descr': None, + u'has_carrier': True, }, { u'mac_address': u'00:0c:29:8c:11:b2', u'name': u'eth1', u'ipv4_address': None, u'switch_chassis_descr': None, - 'switch_port_descr': None + u'switch_port_descr': None, + u'has_carrier': True, } ], u'cpu': { @@ -295,14 +297,16 @@ class TestBaseIronicPythonAgent(test_base.BaseTestCase): u'name': u'eth0', u'ipv4_address': None, u'switch_chassis_descr': None, - u'switch_port_descr': None + u'switch_port_descr': None, + u'has_carrier': True, }, { u'mac_address': u'00:0c:29:8c:11:b2', u'name': u'eth1', u'ipv4_address': None, u'switch_chassis_descr': None, - 'switch_port_descr': None + u'switch_port_descr': None, + u'has_carrier': True, } ], u'cpu': { diff --git a/releasenotes/notes/inspection-wait-for-ips-223e39b65fef31bd.yaml b/releasenotes/notes/inspection-wait-for-ips-223e39b65fef31bd.yaml new file mode 100644 index 000000000..0dbde182d --- /dev/null +++ b/releasenotes/notes/inspection-wait-for-ips-223e39b65fef31bd.yaml @@ -0,0 +1,7 @@ +--- +features: + - The "has_carrier" flag was added to the network interface information. +fixes: + - Inspection code now waits up to 1 minute for all NICs to get their IP addresses. + Otherwise inspection fails for some users due to race between DIB DHCP code + and IPA startup. See https://bugs.launchpad.net/bugs/1564954 for details.