From c874f6dadaa983ed4f338808907c8829a7f86031 Mon Sep 17 00:00:00 2001 From: Swaminathan Vasudevan Date: Wed, 30 Sep 2015 11:15:52 -0700 Subject: [PATCH] Split the FIP Namespace delete in L3 agent for DVR Right now we are seeing a race condition in the l3 agent for DVR routers when a floatingip is deleted and created. The agent tries to delete the floatingip namespace and while it tries to delete there is another call to add a namespace. There is a timing window in between these two calls where sometimes the call to create a namespace succeeds but, when tried to execute any commands in the namespace it fails, since the namespace was deleted concurrently. Since the fip namespace is associated with an external net and each node has only one fip namespace for an external net, we would like to only delete the fip namespace when the external net is deleted. The first step is to split the delete functionality into two. The call to fip_ns.cleanup will only remove the dependency that the fipnamespace has with the router namespace such as fpr and rfp veth pairs. The call to fip_ns.delete will actually delete the the fip namespace and the fg device. Partial-Bug: #1501873 Change-Id: Ic94625d5a968f554af70c274b2b2c20ab64e2487 --- neutron/agent/l3/dvr_local_router.py | 24 +++-------------- .../tests/functional/agent/test_l3_agent.py | 27 +------------------ .../unit/agent/l3/test_dvr_local_router.py | 17 +++++------- 3 files changed, 12 insertions(+), 56 deletions(-) diff --git a/neutron/agent/l3/dvr_local_router.py b/neutron/agent/l3/dvr_local_router.py index 43acca555b5..e02ae7362ee 100644 --- a/neutron/agent/l3/dvr_local_router.py +++ b/neutron/agent/l3/dvr_local_router.py @@ -136,26 +136,10 @@ class DvrLocalRouter(dvr_router_base.DvrRouterBase): self.fip_ns.local_subnets.release(self.router_id) self.rtr_fip_subnet = None ns_ip.del_veth(fip_2_rtr_name) - is_last = self.fip_ns.unsubscribe(self.router_id) - if is_last: - # TODO(Carl) I can't help but think that another router could - # come in and want to start using this namespace while this is - # destroying it. The two could end up conflicting on - # creating/destroying interfaces and such. I think I'd like a - # semaphore to sync creation/deletion of this namespace. - - # NOTE (Swami): Since we are deleting the namespace here we - # should be able to delete the floatingip agent gateway port - # for the provided external net since we don't need it anymore. - if self.fip_ns.agent_gateway_port: - LOG.debug('Removed last floatingip, so requesting the ' - 'server to delete Floatingip Agent Gateway port:' - '%s', self.fip_ns.agent_gateway_port) - self.agent.plugin_rpc.delete_agent_gateway_port( - self.agent.context, - self.fip_ns.agent_gateway_port['network_id']) - self.fip_ns.delete() - self.fip_ns = None + self.fip_ns.unsubscribe(self.router_id) + # NOTE (Swami): The fg interface and the namespace will be deleted + # when the external gateway port is removed. This will be + # initiated from the server through an RPC call. def add_floating_ip(self, fip, interface_name, device): if not self._add_fip_addr_to_device(fip, device): diff --git a/neutron/tests/functional/agent/test_l3_agent.py b/neutron/tests/functional/agent/test_l3_agent.py index 8be8ffe5e22..6b98bc6076e 100644 --- a/neutron/tests/functional/agent/test_l3_agent.py +++ b/neutron/tests/functional/agent/test_l3_agent.py @@ -1154,7 +1154,6 @@ class TestDvrRouter(L3AgentTestFramework): router, ip_versions, snat_ns_name) self._delete_router(self.agent, router.router_id) - self._assert_interfaces_deleted_from_ovs() self._assert_router_does_not_exist(router) self._assert_snat_namespace_does_not_exist(router) @@ -1362,7 +1361,7 @@ class TestDvrRouter(L3AgentTestFramework): router1.router[l3_constants.FLOATINGIP_KEY] = [] self.manage_router(restarted_agent, router1.router) self._assert_dvr_snat_gateway(router1) - self.assertFalse(self._namespace_exists(fip_ns)) + self.assertTrue(self._namespace_exists(fip_ns)) def test_dvr_router_add_fips_on_restarted_agent(self): self.agent.conf.agent_mode = 'dvr' @@ -1521,30 +1520,6 @@ class TestDvrRouter(L3AgentTestFramework): self.assertFalse(sg_device) self.assertTrue(qg_device) - def test_dvr_router_calls_delete_agent_gateway_if_last_fip(self): - """Test to validate delete fip if it is last fip managed by agent.""" - self.agent.conf.agent_mode = 'dvr_snat' - router_info = self.generate_dvr_router_info(enable_snat=True) - router1 = self.manage_router(self.agent, router_info) - floating_agent_gw_port = ( - router1.router[l3_constants.FLOATINGIP_AGENT_INTF_KEY]) - self.assertTrue(floating_agent_gw_port) - fip_ns = router1.fip_ns.get_name() - router1.fip_ns.agent_gw_port = floating_agent_gw_port - self.assertTrue(self._namespace_exists(router1.ns_name)) - self.assertTrue(self._namespace_exists(fip_ns)) - self._assert_dvr_floating_ips(router1) - self._assert_dvr_snat_gateway(router1) - router1.router[l3_constants.FLOATINGIP_KEY] = [] - rpc_mock = mock.patch.object( - self.agent.plugin_rpc, 'delete_agent_gateway_port').start() - self.agent._process_updated_router(router1.router) - self.assertTrue(rpc_mock.called) - rpc_mock.assert_called_once_with( - self.agent.context, - floating_agent_gw_port[0]['network_id']) - self.assertFalse(self._namespace_exists(fip_ns)) - def _mocked_dvr_ha_router(self, agent): r_info = self.generate_dvr_router_info(enable_ha=True, enable_snat=True, diff --git a/neutron/tests/unit/agent/l3/test_dvr_local_router.py b/neutron/tests/unit/agent/l3/test_dvr_local_router.py index e3a505148c5..8cb1fb429ff 100644 --- a/neutron/tests/unit/agent/l3/test_dvr_local_router.py +++ b/neutron/tests/unit/agent/l3/test_dvr_local_router.py @@ -242,16 +242,13 @@ class TestDvrRouterOperations(base.BaseTestCase): ri.rtr_fip_subnet = lla.LinkLocalAddressPair('15.1.2.3/32') _, fip_to_rtr = ri.rtr_fip_subnet.get_pair() fip_ns = ri.fip_ns - with mock.patch.object(self.plugin_api, - 'delete_agent_gateway_port') as del_fip_gw: - ri.floating_ip_removed_dist(fip_cidr) - self.assertTrue(del_fip_gw.called) - self.assertTrue(fip_ns.destroyed) - mIPWrapper().del_veth.assert_called_once_with( - fip_ns.get_int_device_name(router['id'])) - mIPDevice().route.delete_gateway.assert_called_once_with( - str(fip_to_rtr.ip), table=16) - fip_ns.unsubscribe.assert_called_once_with(ri.router_id) + ri.floating_ip_removed_dist(fip_cidr) + self.assertTrue(fip_ns.destroyed) + mIPWrapper().del_veth.assert_called_once_with( + fip_ns.get_int_device_name(router['id'])) + mIPDevice().route.delete_gateway.assert_called_once_with( + str(fip_to_rtr.ip), table=16) + fip_ns.unsubscribe.assert_called_once_with(ri.router_id) def _test_add_floating_ip(self, ri, fip, is_failure): ri._add_fip_addr_to_device = mock.Mock(return_value=is_failure)