Make port binding failure (configurably) fatal
When using drivers like `networking-generic-switch`, there can be issues with changing the VLAN the server is attached to. Currently, the instance will still continue to attempt to boot, in some cases the instance can go into ACTIVE (if the server happens to be on the provisioning network already), but the server is not attached to the correct VLAN, generally causing the network to fail to come up. If we look at the ports, they are marked as binding failed. This is something that Nova checks for after doing the port update to attach the binding information. Lets add that check into Ironic, such that the above build will fail with a good hint at where to look for problems (namely Neutron). Change-Id: I3811941a3dff0a9f968258d05cc020e1f52e3e40
This commit is contained in:
@@ -362,8 +362,27 @@ def add_ports_to_network(task, network_uuid, security_groups=None):
|
|||||||
update_port_attrs)
|
update_port_attrs)
|
||||||
if CONF.neutron.dhcpv6_stateful_address_count > 1:
|
if CONF.neutron.dhcpv6_stateful_address_count > 1:
|
||||||
_add_ip_addresses_for_ipv6_stateful(task.context, port, client)
|
_add_ip_addresses_for_ipv6_stateful(task.context, port, client)
|
||||||
if is_smart_nic:
|
|
||||||
wait_for_port_status(client, port.id, 'ACTIVE')
|
binding_fail_fatal = False
|
||||||
|
is_neutron_iface = node.network_interface == 'neutron'
|
||||||
|
|
||||||
|
if is_neutron_iface:
|
||||||
|
binding_fail_fatal = getattr(
|
||||||
|
CONF.neutron, 'fail_on_port_binding_failure', False)
|
||||||
|
|
||||||
|
default_failure_behavior = is_smart_nic or binding_fail_fatal
|
||||||
|
|
||||||
|
fail_on_binding_failure = node.driver_info.get(
|
||||||
|
'fail_on_binding_failure', default_failure_behavior)
|
||||||
|
|
||||||
|
# NOTE(cid): Only check port status if it's a smart NIC or if we're
|
||||||
|
# configured to fail on binding failures. This avoids unnecessary
|
||||||
|
# failures when using network interfaces where binding may fail but
|
||||||
|
# we want to continue anyway (like in the case of flat networks).
|
||||||
|
if fail_on_binding_failure:
|
||||||
|
wait_for_port_status(
|
||||||
|
client, port.id, 'ACTIVE',
|
||||||
|
fail_on_binding_failure=fail_on_binding_failure)
|
||||||
except openstack_exc.OpenStackCloudException as e:
|
except openstack_exc.OpenStackCloudException as e:
|
||||||
failures.append(ironic_port.uuid)
|
failures.append(ironic_port.uuid)
|
||||||
LOG.warning("Could not create neutron port for node's "
|
LOG.warning("Could not create neutron port for node's "
|
||||||
@@ -1016,12 +1035,15 @@ def wait_for_host_agent(client, host_id, target_state='up'):
|
|||||||
stop=tenacity.stop_after_attempt(CONF.agent.neutron_agent_max_attempts),
|
stop=tenacity.stop_after_attempt(CONF.agent.neutron_agent_max_attempts),
|
||||||
wait=tenacity.wait_fixed(CONF.agent.neutron_agent_status_retry_interval),
|
wait=tenacity.wait_fixed(CONF.agent.neutron_agent_status_retry_interval),
|
||||||
reraise=True)
|
reraise=True)
|
||||||
def wait_for_port_status(client, port_id, status):
|
def wait_for_port_status(client, port_id, status,
|
||||||
|
fail_on_binding_failure=None):
|
||||||
"""Wait for port status to be the desired status
|
"""Wait for port status to be the desired status
|
||||||
|
|
||||||
:param client: A Neutron client object.
|
:param client: A Neutron client object.
|
||||||
:param port_id: Neutron port_id
|
:param port_id: Neutron port_id
|
||||||
:param status: Port's target status, can be ACTIVE, DOWN ... etc.
|
:param status: Port's target status, can be ACTIVE, DOWN ... etc.
|
||||||
|
:param fail_on_binding_failure: Whether to raise exception if port
|
||||||
|
binding fails.
|
||||||
:returns: boolean indicates that the port status matches the
|
:returns: boolean indicates that the port status matches the
|
||||||
required value passed by param status.
|
required value passed by param status.
|
||||||
:raises: InvalidParameterValue if the port does not exist.
|
:raises: InvalidParameterValue if the port does not exist.
|
||||||
@@ -1035,6 +1057,14 @@ def wait_for_port_status(client, port_id, status):
|
|||||||
{'port_id': port_id, 'status': port.status})
|
{'port_id': port_id, 'status': port.status})
|
||||||
if port.status == status:
|
if port.status == status:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# fail early on port binding failure
|
||||||
|
if port.get('binding:vif_type') == 'binding_failed':
|
||||||
|
msg = "Binding failed for neutron port %s" % port_id
|
||||||
|
if fail_on_binding_failure:
|
||||||
|
LOG.error(msg)
|
||||||
|
raise openstack_exc.OpenStackCloudException(msg)
|
||||||
|
LOG.warning(msg)
|
||||||
raise exception.NetworkError(
|
raise exception.NetworkError(
|
||||||
'Port %(port_id)s failed to reach status %(status)s' % {
|
'Port %(port_id)s failed to reach status %(status)s' % {
|
||||||
'port_id': port_id, 'status': status})
|
'port_id': port_id, 'status': status})
|
||||||
|
@@ -26,6 +26,10 @@ opts = [
|
|||||||
mutable=True,
|
mutable=True,
|
||||||
help=_('Delay value to wait for Neutron agents to setup '
|
help=_('Delay value to wait for Neutron agents to setup '
|
||||||
'sufficient DHCP configuration for port.')),
|
'sufficient DHCP configuration for port.')),
|
||||||
|
cfg.BoolOpt('fail_on_port_binding_failure',
|
||||||
|
default=True,
|
||||||
|
help=_('Whether to fail or continue deployment if neutron '
|
||||||
|
'port binding fails.')),
|
||||||
cfg.IntOpt('retries',
|
cfg.IntOpt('retries',
|
||||||
default=3,
|
default=3,
|
||||||
mutable=True,
|
mutable=True,
|
||||||
|
@@ -294,8 +294,27 @@ def plug_port_to_tenant_network(task, port_like_obj, client=None):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
neutron.update_neutron_port(task.context, vif_id, port_attrs)
|
neutron.update_neutron_port(task.context, vif_id, port_attrs)
|
||||||
if is_smart_nic:
|
|
||||||
neutron.wait_for_port_status(client, vif_id, 'ACTIVE')
|
binding_fail_fatal = False
|
||||||
|
is_neutron_iface = node.network_interface == 'neutron'
|
||||||
|
|
||||||
|
if is_neutron_iface:
|
||||||
|
binding_fail_fatal = getattr(
|
||||||
|
CONF.neutron, 'fail_on_port_binding_failure', False)
|
||||||
|
|
||||||
|
default_failure_behavior = is_smart_nic or binding_fail_fatal
|
||||||
|
|
||||||
|
fail_on_binding_failure = node.driver_info.get(
|
||||||
|
'fail_on_binding_failure', default_failure_behavior)
|
||||||
|
|
||||||
|
# NOTE(cid): Only check port status if it's a smart NIC or if we're
|
||||||
|
# configured to fail on binding failures. This avoids unnecessary
|
||||||
|
# failures when using network interfaces where binding may fail but
|
||||||
|
# we want to continue anyway (like in the case of flat networks).
|
||||||
|
if fail_on_binding_failure:
|
||||||
|
neutron.wait_for_port_status(
|
||||||
|
client, vif_id, 'ACTIVE',
|
||||||
|
fail_on_binding_failure=fail_on_binding_failure)
|
||||||
except openstack_exc.OpenStackCloudException as e:
|
except openstack_exc.OpenStackCloudException as e:
|
||||||
msg = (_('Could not add public network VIF %(vif)s '
|
msg = (_('Could not add public network VIF %(vif)s '
|
||||||
'to node %(node)s, possible network issue. %(exc)s') %
|
'to node %(node)s, possible network issue. %(exc)s') %
|
||||||
|
@@ -1026,7 +1026,8 @@ class TestNeutronNetworkActions(db_base.DbTestCase):
|
|||||||
wait_agent_mock.assert_called_once_with(
|
wait_agent_mock.assert_called_once_with(
|
||||||
self.client_mock, 'hostname')
|
self.client_mock, 'hostname')
|
||||||
wait_port_mock.assert_called_once_with(
|
wait_port_mock.assert_called_once_with(
|
||||||
self.client_mock, self.neutron_port.id, 'ACTIVE')
|
self.client_mock, self.neutron_port.id, 'ACTIVE',
|
||||||
|
fail_on_binding_failure=True)
|
||||||
|
|
||||||
@mock.patch.object(neutron, 'is_smartnic_port', autospec=True)
|
@mock.patch.object(neutron, 'is_smartnic_port', autospec=True)
|
||||||
@mock.patch.object(neutron, 'wait_for_host_agent', autospec=True)
|
@mock.patch.object(neutron, 'wait_for_host_agent', autospec=True)
|
||||||
|
@@ -428,8 +428,11 @@ class TestCommonFunctions(db_base.DbTestCase):
|
|||||||
task, self.vif_id, {'physnet2'})
|
task, self.vif_id, {'physnet2'})
|
||||||
|
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
def test_plug_port_to_tenant_network_client(self, mock_gc, mock_update):
|
def test_plug_port_to_tenant_network_client(self, mock_gc,
|
||||||
|
wait_mock_status,
|
||||||
|
mock_update):
|
||||||
self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id}
|
self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id}
|
||||||
self.port.save()
|
self.port.save()
|
||||||
with task_manager.acquire(self.context, self.node.id) as task:
|
with task_manager.acquire(self.context, self.node.id) as task:
|
||||||
@@ -439,8 +442,11 @@ class TestCommonFunctions(db_base.DbTestCase):
|
|||||||
self.assertTrue(mock_update.called)
|
self.assertTrue(mock_update.called)
|
||||||
|
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
def test_plug_port_to_tenant_network_no_client(self, mock_gc, mock_update):
|
def test_plug_port_to_tenant_network_no_client(self, mock_gc,
|
||||||
|
wait_mock_status,
|
||||||
|
mock_update):
|
||||||
self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id}
|
self.port.internal_info = {common.TENANT_VIF_KEY: self.vif_id}
|
||||||
self.port.save()
|
self.port.save()
|
||||||
with task_manager.acquire(self.context, self.node.id) as task:
|
with task_manager.acquire(self.context, self.node.id) as task:
|
||||||
@@ -480,7 +486,7 @@ class TestCommonFunctions(db_base.DbTestCase):
|
|||||||
wait_agent_mock.assert_called_once_with(
|
wait_agent_mock.assert_called_once_with(
|
||||||
nclient, 'hostname')
|
nclient, 'hostname')
|
||||||
wait_port_mock.assert_called_once_with(
|
wait_port_mock.assert_called_once_with(
|
||||||
nclient, self.vif_id, 'ACTIVE')
|
nclient, self.vif_id, 'ACTIVE', fail_on_binding_failure=True)
|
||||||
self.assertTrue(mock_update.called)
|
self.assertTrue(mock_update.called)
|
||||||
|
|
||||||
|
|
||||||
|
@@ -560,10 +560,12 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
|
|||||||
|
|
||||||
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
@mock.patch.object(neutron, 'LOG', autospec=True)
|
@mock.patch.object(neutron, 'LOG', autospec=True)
|
||||||
def test_configure_tenant_networks_multiple_ports_one_vif_id(
|
def test_configure_tenant_networks_multiple_ports_one_vif_id(
|
||||||
self, log_mock, client_mock, update_mock, wait_agent_mock):
|
self, log_mock, client_mock, wait_mock_status, update_mock,
|
||||||
|
wait_agent_mock):
|
||||||
expected_attrs = {
|
expected_attrs = {
|
||||||
'binding:vnic_type': 'baremetal',
|
'binding:vnic_type': 'baremetal',
|
||||||
'binding:host_id': self.node.uuid,
|
'binding:host_id': self.node.uuid,
|
||||||
@@ -582,8 +584,10 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
|
|||||||
|
|
||||||
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
def test_configure_tenant_networks_update_fail(self, client_mock,
|
def test_configure_tenant_networks_update_fail(self, client_mock,
|
||||||
|
wait_mock_status,
|
||||||
update_mock,
|
update_mock,
|
||||||
wait_agent_mock):
|
wait_agent_mock):
|
||||||
update_mock.side_effect = openstack_exc.OpenStackCloudException(
|
update_mock.side_effect = openstack_exc.OpenStackCloudException(
|
||||||
@@ -594,11 +598,26 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
|
|||||||
self.interface.configure_tenant_networks, task)
|
self.interface.configure_tenant_networks, task)
|
||||||
client_mock.assert_called_once_with(context=task.context)
|
client_mock.assert_called_once_with(context=task.context)
|
||||||
|
|
||||||
|
@mock.patch.object(neutron_common, '_get_port_by_uuid', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
|
def test_configure_tenant_networks_update_binding_fail(self, client_mock,
|
||||||
|
mock_get_port):
|
||||||
|
self.config(fail_on_port_binding_failure=True, group='neutron')
|
||||||
|
port = mock.MagicMock()
|
||||||
|
port.get.return_value = 'binding_failed'
|
||||||
|
mock_get_port.return_value = port
|
||||||
|
|
||||||
|
with task_manager.acquire(self.context, self.node.id) as task:
|
||||||
|
self.assertRaisesRegex(
|
||||||
|
exception.NetworkError, 'Binding failed',
|
||||||
|
self.interface.configure_tenant_networks, task)
|
||||||
|
|
||||||
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
def _test_configure_tenant_networks(self, client_mock, update_mock,
|
def _test_configure_tenant_networks(self, client_mock, wait_mock_status,
|
||||||
wait_agent_mock,
|
update_mock, wait_agent_mock,
|
||||||
is_client_id=False):
|
is_client_id=False):
|
||||||
# NOTE(TheJulia): Until we have a replacement for infiniband client-id
|
# NOTE(TheJulia): Until we have a replacement for infiniband client-id
|
||||||
# storage, extra has to stay put. On a plus side, this would be
|
# storage, extra has to stay put. On a plus side, this would be
|
||||||
@@ -668,12 +687,13 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
|
|||||||
@mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True)
|
@mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_local_group_information',
|
@mock.patch.object(neutron_common, 'get_local_group_information',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test_configure_tenant_networks_with_portgroups(
|
def test_configure_tenant_networks_with_portgroups(
|
||||||
self, glgi_mock, client_mock, update_mock, wait_agent_mock,
|
self, glgi_mock, client_mock, wait_mock_status, update_mock,
|
||||||
port_data_mock):
|
wait_agent_mock, port_data_mock):
|
||||||
pg = utils.create_test_portgroup(
|
pg = utils.create_test_portgroup(
|
||||||
self.context, node_id=self.node.id, address='ff:54:00:cf:2d:32',
|
self.context, node_id=self.node.id, address='ff:54:00:cf:2d:32',
|
||||||
internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()})
|
internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()})
|
||||||
@@ -729,12 +749,13 @@ class NeutronInterfaceTestCase(db_base.DbTestCase):
|
|||||||
@mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True)
|
@mock.patch.object(neutron_common, 'get_neutron_port_data', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
@mock.patch.object(neutron_common, 'wait_for_host_agent', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
@mock.patch.object(neutron_common, 'update_neutron_port', autospec=True)
|
||||||
|
@mock.patch.object(neutron_common, 'wait_for_port_status', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
@mock.patch.object(neutron_common, 'get_client', autospec=True)
|
||||||
@mock.patch.object(neutron_common, 'get_local_group_information',
|
@mock.patch.object(neutron_common, 'get_local_group_information',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test_configure_tenant_networks_with_portgroups_no_address(
|
def test_configure_tenant_networks_with_portgroups_no_address(
|
||||||
self, glgi_mock, client_mock, update_mock, wait_agent_mock,
|
self, glgi_mock, client_mock, wait_mock_status, update_mock,
|
||||||
port_data_mock):
|
wait_agent_mock, port_data_mock):
|
||||||
pg = utils.create_test_portgroup(
|
pg = utils.create_test_portgroup(
|
||||||
self.context, node_id=self.node.id, address=None,
|
self.context, node_id=self.node.id, address=None,
|
||||||
internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()})
|
internal_info={'tenant_vif_port_id': uuidutils.generate_uuid()})
|
||||||
|
@@ -0,0 +1,10 @@
|
|||||||
|
---
|
||||||
|
features:
|
||||||
|
- |
|
||||||
|
Adds a new boolean configuration option ``[neutron]fail_on_port_binding_failure``
|
||||||
|
and corresponding node ``driver_info`` setting ``fail_on_binding_failure``
|
||||||
|
to control whether deployment should fail or continue if Neutron port
|
||||||
|
binding fails. With a default of ``true``, if your network is not
|
||||||
|
configured properly, this will likely cause deployment failures.
|
||||||
|
To maintain the previous behavior, explicitly set this option to False in
|
||||||
|
your configuration.
|
Reference in New Issue
Block a user