Merge "Make port binding failure (configurably) fatal"

This commit is contained in:
Zuul
2025-06-04 21:39:23 +00:00
committed by Gerrit Code Review
7 changed files with 107 additions and 16 deletions

View File

@@ -407,8 +407,27 @@ def add_ports_to_network(task, network_uuid, security_groups=None):
update_port_attrs)
if CONF.neutron.dhcpv6_stateful_address_count > 1:
_add_ip_addresses_for_ipv6_stateful(task.context, port, client)
if is_smart_nic:
wait_for_port_status(client, port.id, 'ACTIVE')
binding_fail_fatal = False
is_neutron_iface = node.network_interface == 'neutron'
if is_neutron_iface:
binding_fail_fatal = getattr(
CONF.neutron, 'fail_on_port_binding_failure', False)
default_failure_behavior = is_smart_nic or binding_fail_fatal
fail_on_binding_failure = node.driver_info.get(
'fail_on_binding_failure', default_failure_behavior)
# NOTE(cid): Only check port status if it's a smart NIC or if we're
# configured to fail on binding failures. This avoids unnecessary
# failures when using network interfaces where binding may fail but
# we want to continue anyway (like in the case of flat networks).
if fail_on_binding_failure:
wait_for_port_status(
client, port.id, 'ACTIVE',
fail_on_binding_failure=fail_on_binding_failure)
except openstack_exc.OpenStackCloudException as e:
failures.append(ironic_port.uuid)
LOG.warning("Could not create neutron port for node's "
@@ -1061,12 +1080,15 @@ def wait_for_host_agent(client, host_id, target_state='up'):
stop=tenacity.stop_after_attempt(CONF.agent.neutron_agent_max_attempts),
wait=tenacity.wait_fixed(CONF.agent.neutron_agent_status_retry_interval),
reraise=True)
def wait_for_port_status(client, port_id, status):
def wait_for_port_status(client, port_id, status,
fail_on_binding_failure=None):
"""Wait for port status to be the desired status
:param client: A Neutron client object.
:param port_id: Neutron port_id
:param status: Port's target status, can be ACTIVE, DOWN ... etc.
:param fail_on_binding_failure: Whether to raise exception if port
binding fails.
:returns: boolean indicates that the port status matches the
required value passed by param status.
:raises: InvalidParameterValue if the port does not exist.
@@ -1080,6 +1102,14 @@ def wait_for_port_status(client, port_id, status):
{'port_id': port_id, 'status': port.status})
if port.status == status:
return True
# fail early on port binding failure
if port.get('binding:vif_type') == 'binding_failed':
msg = "Binding failed for neutron port %s" % port_id
if fail_on_binding_failure:
LOG.error(msg)
raise openstack_exc.OpenStackCloudException(msg)
LOG.warning(msg)
raise exception.NetworkError(
'Port %(port_id)s failed to reach status %(status)s' % {
'port_id': port_id, 'status': status})

View File

@@ -26,6 +26,10 @@ opts = [
mutable=True,
help=_('Delay value to wait for Neutron agents to setup '
'sufficient DHCP configuration for port.')),
cfg.BoolOpt('fail_on_port_binding_failure',
default=True,
help=_('Whether to fail or continue deployment if neutron '
'port binding fails.')),
cfg.IntOpt('retries',
default=3,
mutable=True,

View File

@@ -294,8 +294,27 @@ def plug_port_to_tenant_network(task, port_like_obj, client=None):
try:
neutron.update_neutron_port(task.context, vif_id, port_attrs)
if is_smart_nic:
neutron.wait_for_port_status(client, vif_id, 'ACTIVE')
binding_fail_fatal = False
is_neutron_iface = node.network_interface == 'neutron'
if is_neutron_iface:
binding_fail_fatal = getattr(
CONF.neutron, 'fail_on_port_binding_failure', False)
default_failure_behavior = is_smart_nic or binding_fail_fatal
fail_on_binding_failure = node.driver_info.get(
'fail_on_binding_failure', default_failure_behavior)
# NOTE(cid): Only check port status if it's a smart NIC or if we're
# configured to fail on binding failures. This avoids unnecessary
# failures when using network interfaces where binding may fail but
# we want to continue anyway (like in the case of flat networks).
if fail_on_binding_failure:
neutron.wait_for_port_status(
client, vif_id, 'ACTIVE',
fail_on_binding_failure=fail_on_binding_failure)
except openstack_exc.OpenStackCloudException as e:
msg = (_('Could not add public network VIF %(vif)s '
'to node %(node)s, possible network issue. %(exc)s') %

View File

@@ -1026,7 +1026,8 @@ class TestNeutronNetworkActions(db_base.DbTestCase):
wait_agent_mock.assert_called_once_with(
self.client_mock, 'hostname')
wait_port_mock.assert_called_once_with(
self.client_mock, self.neutron_port.id, 'ACTIVE')
self.client_mock, self.neutron_port.id, 'ACTIVE',
fail_on_binding_failure=True)
@mock.patch.object(neutron, 'is_smartnic_port', autospec=True)
@mock.patch.object(neutron, 'wait_for_host_agent', autospec=True)

View File

@@ -428,8 +428,11 @@ class TestCommonFunctions(db_base.DbTestCase):
task, self.vif_id, {'physnet2'})
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
def test_plug_port_to_tenant_network_client(self, mock_gc, mock_update):
def test_plug_port_to_tenant_network_client(self, mock_gc,
wait_mock_status,
mock_update):
self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id}
self.port.save()
with task_manager.acquire(self.context, self.node.id) as task:
@@ -439,8 +442,11 @@ class TestCommonFunctions(db_base.DbTestCase):
self.assertTrue(mock_update.called)
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
def test_plug_port_to_tenant_network_no_client(self, mock_gc, mock_update):
def test_plug_port_to_tenant_network_no_client(self, mock_gc,
wait_mock_status,
mock_update):
self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id}
self.port.save()
with task_manager.acquire(self.context, self.node.id) as task:
@@ -480,7 +486,7 @@ class TestCommonFunctions(db_base.DbTestCase):
wait_agent_mock.assert_called_once_with(
nclient, 'hostname')
wait_port_mock.assert_called_once_with(
nclient, self.vif_id, 'ACTIVE')
nclient, self.vif_id, 'ACTIVE', fail_on_binding_failure=True)
self.assertTrue(mock_update.called)

View File

@@ -561,10 +561,12 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
@mock.patch.object(neutron, 'LOG', autospec=True)
def test_configure_tenant_networks_multiple_ports_one_vif_id(
self, log_mock, client_mock, update_mock, wait_agent_mock):
self, log_mock, client_mock, wait_mock_status, update_mock,
wait_agent_mock):
expected_attrs = {
'binding:vnic_type': 'baremetal',
'binding:host_id': self.node.uuid,
@@ -583,8 +585,10 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
def test_configure_tenant_networks_update_fail(self, client_mock,
wait_mock_status,
update_mock,
wait_agent_mock):
update_mock.side_effect = openstack_exc.OpenStackCloudException(
@@ -595,11 +599,26 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
self.interface.configure_tenant_networks, task)
client_mock.assert_called_once_with(context=task.context)
@mock.patch.object(neutron_common, '_get_port_by_uuid', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
def test_configure_tenant_networks_update_binding_fail(self, client_mock,
mock_get_port):
self.config(fail_on_port_binding_failure=True, group='neutron')
port = mock.MagicMock()
port.get.return_value = 'binding_failed'
mock_get_port.return_value = port
with task_manager.acquire(self.context, self.node.id) as task:
self.assertRaisesRegex(
exception.NetworkError, 'Binding failed',
self.interface.configure_tenant_networks, task)
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
def _test_configure_tenant_networks(self, client_mock, update_mock,
wait_agent_mock,
def _test_configure_tenant_networks(self, client_mock, wait_mock_status,
update_mock, wait_agent_mock,
is_client_id=False):
# NOTE(TheJulia): Until we have a replacement for infiniband client-id
# storage, extra has to stay put. On a plus side, this would be
@@ -669,12 +688,13 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
@mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
@mock.patch.object(neutron_common, 'get_local_group_information',
autospec=True)
def test_configure_tenant_networks_with_portgroups(
self, glgi_mock, client_mock, update_mock, wait_agent_mock,
port_data_mock):
self, glgi_mock, client_mock, wait_mock_status, update_mock,
wait_agent_mock, port_data_mock):
pg = utils.create_test_portgroup(
self.context, node_id=self.node.id, address='ff:54:00:cf:2d:32',
internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()})
@@ -730,12 +750,13 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
@mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
@mock.patch.object(neutron_common, 'get_client', autospec=True)
@mock.patch.object(neutron_common, 'get_local_group_information',
autospec=True)
def test_configure_tenant_networks_with_portgroups_no_address(
self, glgi_mock, client_mock, update_mock, wait_agent_mock,
port_data_mock):
self, glgi_mock, client_mock, wait_mock_status, update_mock,
wait_agent_mock, port_data_mock):
pg = utils.create_test_portgroup(
self.context, node_id=self.node.id, address=None,
internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()})

View File

@@ -0,0 +1,10 @@
---
features:
- |
Adds a new boolean configuration option ``[neutron]fail_on_port_binding_failure``
and corresponding node ``driver_info`` setting ``fail_on_binding_failure``
to control whether deployment should fail or continue if Neutron port
binding fails. With a default of ``true``, if your network is not
configured properly, this will likely cause deployment failures.
To maintain the previous behavior, explicitly set this option to False in
your configuration.