From 91f0864dc0ccf0f67be7162f011706dbc6383cb3 Mon Sep 17 00:00:00 2001 From: Rodolfo Alonso Hernandez Date: Tue, 30 Aug 2022 18:09:34 +0200 Subject: [PATCH] Add an active wait during the port provisioning event In ML2/OVN, during a live-migration process, it could happend that the port provisioning event is received before the port binding has been updated. That means the port has been created in the destination host and the event received (this event will remove any pending provisioning block). But the Nova port binding request has not arrived yet, updating the port binding registers. Because the port is considered "not bound" (yet), the port provisioning doesn't set the port status to ACTIVE. This patch creates an active wait during the port provisioning event method. If the port binding is still "unbound", the method retries the port retrieval several times, giving some time to the port binding request from Nova to arrive. Closes-Bug: #1988199 Change-Id: I50091c84e67c172c94ce9140f23235421599185c --- neutron/plugins/ml2/plugin.py | 69 ++++++++++++------- neutron/tests/unit/plugins/ml2/test_plugin.py | 27 ++++++++ ...t-provisioning-retry-8edf16a258b164a0.yaml | 8 +++ 3 files changed, 79 insertions(+), 25 deletions(-) create mode 100644 releasenotes/notes/add-port-provisioning-retry-8edf16a258b164a0.yaml diff --git a/neutron/plugins/ml2/plugin.py b/neutron/plugins/ml2/plugin.py index c4ce9061b5a..ec7adfd6155 100644 --- a/neutron/plugins/ml2/plugin.py +++ b/neutron/plugins/ml2/plugin.py @@ -147,6 +147,7 @@ from neutron.services.segments import plugin as segments_plugin LOG = log.getLogger(__name__) MAX_BIND_TRIES = 10 +MAX_PROVISIONING_TRIES = MAX_BIND_TRIES SERVICE_PLUGINS_REQUIRED_DRIVERS = { @@ -334,37 +335,55 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2, [provisioning_blocks.PROVISIONING_COMPLETE]) def _port_provisioned(self, rtype, event, trigger, payload=None): port_id = payload.resource_id - port = db.get_port(payload.context, port_id) - port_binding = p_utils.get_port_binding_by_status_and_host( - getattr(port, 'port_bindings', []), const.ACTIVE) - if not port or not port_binding: - LOG.debug("Port %s was deleted so its status cannot be updated.", - port_id) - return - if port_binding.vif_type in (portbindings.VIF_TYPE_BINDING_FAILED, - portbindings.VIF_TYPE_UNBOUND): - # NOTE(kevinbenton): we hit here when a port is created without - # a host ID and the dhcp agent notifies that its wiring is done - LOG.debug("Port %s cannot update to ACTIVE because it " - "is not bound.", port_id) - return - else: - # port is bound, but we have to check for new provisioning blocks - # one last time to detect the case where we were triggered by an - # unbound port and the port became bound with new provisioning - # blocks before 'get_port' was called above - if provisioning_blocks.is_object_blocked(payload.context, port_id, - resources.PORT): - LOG.debug("Port %s had new provisioning blocks added so it " - "will not transition to active.", port_id) + for count in range(1, MAX_PROVISIONING_TRIES + 1): + LOG.info('Attempt %(count)s to provision port %(port)s', + {'count': count, 'port': port_id}) + port = db.get_port(payload.context, port_id) + port_bindings = getattr(port, 'port_bindings', []) + port_binding = p_utils.get_port_binding_by_status_and_host( + port_bindings, const.ACTIVE) + + if not port or not port_binding: + LOG.debug("Port %s was deleted so its status cannot be " + "updated.", port_id) return + + if port_binding.vif_type == portbindings.VIF_TYPE_BINDING_FAILED: + LOG.debug('Port %s cannot update to ACTIVE because it failed.', + port_id) + return + + if port_binding.vif_type == portbindings.VIF_TYPE_UNBOUND: + # NOTE(kevinbenton): we hit here when a port is created without + # a host ID and the dhcp agent notifies that its wiring is done + LOG.debug('Port %s cannot update to ACTIVE because it ' + 'is not bound.', port_id) + if count == MAX_PROVISIONING_TRIES: + return + + # Wait 0.5 seconds before checking again if the port is bound. + # We could hit this during a live-migration. + greenthread.sleep(0.5) + continue + + break + + # port is bound, but we have to check for new provisioning blocks + # one last time to detect the case where we were triggered by an + # unbound port and the port became bound with new provisioning + # blocks before 'get_port' was called above + if provisioning_blocks.is_object_blocked(payload.context, port_id, + resources.PORT): + LOG.debug("Port %s had new provisioning blocks added so it " + "will not transition to active.", port_id) + return + if not port.admin_state_up: LOG.debug("Port %s is administratively disabled so it will " "not transition to active.", port_id) return - host_migrating = agent_rpc.migrating_to_host( - getattr(port, 'port_bindings', [])) + host_migrating = agent_rpc.migrating_to_host(port_bindings) if host_migrating and self.nova_notifier: send_nova_event = bool(trigger == provisioning_blocks.L2_AGENT_ENTITY) diff --git a/neutron/tests/unit/plugins/ml2/test_plugin.py b/neutron/tests/unit/plugins/ml2/test_plugin.py index 143b8dc04b4..7b028fba6bd 100644 --- a/neutron/tests/unit/plugins/ml2/test_plugin.py +++ b/neutron/tests/unit/plugins/ml2/test_plugin.py @@ -47,6 +47,7 @@ import testtools import webob from neutron._i18n import _ +from neutron.agent import rpc as agent_rpc from neutron.common import utils from neutron.db import agents_db from neutron.db import provisioning_blocks @@ -1113,6 +1114,32 @@ class TestMl2PortsV2(test_plugin.TestPortsV2, Ml2PluginV2TestCase): self.context, resource_id=port_id)) self.assertFalse(ups.called) + @staticmethod + def _set_max_provisioning_tries(): + ml2_plugin.MAX_PROVISIONING_TRIES = ml2_plugin.MAX_BIND_TRIES + + @mock.patch.object(agent_rpc, 'migrating_to_host', return_value=None) + @mock.patch('neutron.plugins.ml2.plugin.db.get_port') + @mock.patch.object(p_utils, 'get_port_binding_by_status_and_host') + def test__port_provisioned_port_retry_port_binding_unbound( + self, mock_get_pb, mock_get_port, *args): + self.addCleanup(self._set_max_provisioning_tries) + ml2_plugin.MAX_PROVISIONING_TRIES = 2 + plugin = directory.get_plugin() + port_id = 'fake_port_id' + port = mock.Mock(id=port_id, admin_state_up=True) + mock_get_port.return_value = port + with mock.patch.object(plugin, 'update_port_status') as mock_pstatus: + pb1 = mock.MagicMock(vif_type=portbindings.VIF_TYPE_UNBOUND) + pb2 = mock.MagicMock(vif_type=portbindings.VIF_TYPE_OVS) + pb2.__iter__.return_value = [] + mock_get_pb.side_effect = [pb1, pb2] + plugin._port_provisioned('port', 'evt', 'trigger', + payload=events.DBEventPayload( + self.context, resource_id=port_id)) + mock_pstatus.assert_called_once_with(self.context, port_id, + constants.PORT_STATUS_ACTIVE) + def test_port_after_create_outside_transaction(self): self.tx_open = True diff --git a/releasenotes/notes/add-port-provisioning-retry-8edf16a258b164a0.yaml b/releasenotes/notes/add-port-provisioning-retry-8edf16a258b164a0.yaml new file mode 100644 index 00000000000..a40ddb7e924 --- /dev/null +++ b/releasenotes/notes/add-port-provisioning-retry-8edf16a258b164a0.yaml @@ -0,0 +1,8 @@ +--- +features: + - | + After the port is considered as provisioned, the Nova port binding update + could have not been received, leaving the port as not bound. Now the + port provisioning method has an active wait that will retry several times, + waiting for the port binding update. If received, the port status will be + set as active if the admin state flag is set.