From 0ddca284542aed89df4a22607a2da03f193f083c Mon Sep 17 00:00:00 2001 From: Oleg Bondarev Date: Tue, 1 Feb 2022 18:56:02 +0300 Subject: [PATCH] Make sure "dead vlan" ports cannot transmit packets https://review.opendev.org/c/openstack/neutron/+/820897 added a dead vlan flow that pushes the dead vlan tag onto frames belonging to dead ports before these ports are reassigned to their proper vlans. However add_flow and delete_flows race and delete_flows may run before add_flow, in this case deleting 0 flows but not giving us a chance to detect this: neither does it throw an error nor does it return the number of deleted flows. This leads to port staying inaccessible forever and hence breaks corresponding DHCP or router. Current patch suggests another approach to make sure no packets are leaked from newly plugged ports: setting their "vlan_mode" attribute to "trunk" and "trunks"=[4095] (along with assigning dead VLAN tag). With this OVS normal pipeline will allow only packets tagged with 4095 from such ports [1], which normally not happens, but even if it does - default rule in br-int will drop them anyway. Thus untagged packets from such ports will also be dropped until ovs agent sets proper VLAN tag and clears vlan_mode to default ("access"). This approach avoids the race between dhcp/l3 and ovs agents because dhcp/l3 agents no longer modify flow table. This partially reverts commit 7aae31c9f9ed938760ca0be3c461826b598c7004 [1] https://docs.openvswitch.org/en/latest/ref/ovs-actions.7/?highlight=ovs-actions#the-ovs-normal-pipeline Closes-Bug: #1930414 Closes-Bug: #1959564 Change-Id: I0391dd24224f8656a09ddb002e7dae8783ba37a4 --- neutron/agent/common/ovs_lib.py | 45 +++++++++---------- .../openvswitch/agent/ovs_neutron_agent.py | 26 +++++------ neutron/tests/common/conn_testers.py | 8 ---- neutron/tests/common/net_helpers.py | 3 ++ neutron/tests/functional/agent/l2/base.py | 17 ++++--- .../tests/functional/agent/l3/framework.py | 2 + .../tests/functional/agent/test_dhcp_agent.py | 38 +++++----------- .../tests/functional/agent/test_firewall.py | 3 ++ .../tests/functional/agent/test_ovs_lib.py | 4 ++ .../openvswitch/agent/test_trunk_manager.py | 2 + 10 files changed, 71 insertions(+), 77 deletions(-) diff --git a/neutron/agent/common/ovs_lib.py b/neutron/agent/common/ovs_lib.py index 6d7015a90b1..f35e1343ad1 100644 --- a/neutron/agent/common/ovs_lib.py +++ b/neutron/agent/common/ovs_lib.py @@ -371,13 +371,7 @@ class OVSBridge(BaseOVS): with self.ovsdb.transaction() as txn: txn.add(self.ovsdb.add_port(self.br_name, port_name, may_exist=False)) - # NOTE(mangelajo): Port is added to dead vlan (4095) by default - # until it's handled by the neutron-openvswitch-agent. Otherwise it - # becomes a trunk port on br-int (receiving traffic for all vlans), - # and also triggers issues on ovs-vswitchd related to the - # datapath flow revalidator thread, see lp#1767422 - txn.add(self.ovsdb.db_set( - 'Port', port_name, ('tag', constants.DEAD_VLAN_TAG))) + self._set_port_dead(port_name, txn) # TODO(mangelajo): We could accept attr tuples for the Port too # but, that could potentially break usage of this function in @@ -388,22 +382,27 @@ class OVSBridge(BaseOVS): txn.add(self.ovsdb.db_set('Interface', port_name, *interface_attr_tuples)) - # NOTE(bence romsics): We are after the ovsdb transaction therefore - # there's still a short time window between the port created and - # the flow added in which the dead vlan tag is not pushed onto the - # frames arriving at these ports and because of that those frames may - # get through. However before the transaction we cannot create the - # flow because we don't have the ofport. And I'm not aware of a - # combined ovsdb+openflow transaction to do it inside the transaction. - if (self.br_name == cfg.CONF.OVS.integration_bridge): - self.add_flow( - table=constants.LOCAL_SWITCHING, - priority=constants.OPENFLOW_MAX_PRIORITY - 1, - in_port=self.get_port_ofport(port_name), - actions='mod_vlan_vid:{:d},' - 'resubmit(,{:d})'.format( - constants.DEAD_VLAN_TAG, - constants.LOCAL_SWITCHING)) + def _set_port_dead(self, port_name, txn): + # NOTE(mangelajo): Port is added to dead vlan (4095) by default + # until it's handled by the neutron-openvswitch-agent. Otherwise it + # may trigger issues on ovs-vswitchd related to the + # datapath flow revalidator thread, see lp#1767422 + txn.add(self.ovsdb.db_set( + 'Port', port_name, ('tag', constants.DEAD_VLAN_TAG))) + # Just setting 'tag' to 4095 is not enough to prevent any traffic + # to/from new port because "access" ports do not have 802.1Q header + # and hence are not matched by default 4095-dropping rule. + # So we also set "vlan_mode" attribute to "trunk" and "trunks"=[4095] + # With this OVS normal pipeline will allow only packets tagged with + # 4095 from such ports, which normally not happens, + # but even if it does - default rule in br-int will drop them anyway. + # Thus untagged packets from such ports will also be dropped until + # ovs agent sets proper VLAN tag and clears vlan_mode to default + # ("access"). See lp#1930414 for details. + txn.add(self.ovsdb.db_set( + 'Port', port_name, ('vlan_mode', 'trunk'))) + txn.add(self.ovsdb.db_set( + 'Port', port_name, ('trunks', constants.DEAD_VLAN_TAG))) def delete_port(self, port_name): self.ovsdb.del_port(port_name, self.br_name).execute() diff --git a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py index d880721e6eb..9351499d8da 100644 --- a/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py +++ b/neutron/plugins/ml2/drivers/openvswitch/agent/ovs_neutron_agent.py @@ -1228,19 +1228,19 @@ class OVSNeutronAgent(l2population_rpc.L2populationRpcCallBackTunnelMixin, self.setup_arp_spoofing_protection(self.int_br, port, port_detail) if cur_tag != lvm.vlan: - self.int_br.set_db_attribute( - "Port", port.port_name, "tag", lvm.vlan) - # When changing the port's tag from DEAD_VLAN_TAG to - # something else, also delete the previously dead port's - # push DEAD_VLAN_TAG flow which we installed from - # ovs_lib.OVSBridge.replace_port(). - if (cur_tag == constants.DEAD_VLAN_TAG and port.ofport != -1): - self.int_br.delete_flows( - cookie=ovs_lib.COOKIE_ANY, - table=constants.LOCAL_SWITCHING, - priority=constants.OPENFLOW_MAX_PRIORITY - 1, - in_port=port.ofport, - strict=True) + ovsdb = self.int_br.ovsdb + with ovsdb.transaction() as txn: + txn.add(ovsdb.db_set( + 'Port', port.port_name, ('tag', lvm.vlan))) + # When changing the port's tag from DEAD_VLAN_TAG to + # something else, also clear port's vlan_mode and trunks, + # which were set to make sure all packets are dropped. + if (cur_tag == constants.DEAD_VLAN_TAG and + port.ofport != ovs_lib.INVALID_OFPORT): + txn.add(ovsdb.db_clear( + 'Port', port.port_name, 'vlan_mode')) + txn.add(ovsdb.db_clear( + 'Port', port.port_name, 'trunks')) # update plugin about port status # FIXME(salv-orlando): Failures while updating device status diff --git a/neutron/tests/common/conn_testers.py b/neutron/tests/common/conn_testers.py index 033949075f5..414a5fa7db6 100644 --- a/neutron/tests/common/conn_testers.py +++ b/neutron/tests/common/conn_testers.py @@ -20,7 +20,6 @@ from neutron_lib import constants from oslo_config import cfg from oslo_utils import uuidutils -from neutron.agent.common import ovs_lib from neutron.common import utils as common_utils from neutron.plugins.ml2.drivers.openvswitch.agent.common import ( constants as ovs_consts) @@ -386,13 +385,6 @@ class OVSBaseConnectionTester(ConnectionTester): txn.add( ovsdb.db_add( 'Port', port_name, 'other_config', {'tag': str(tag)})) - bridge.delete_flows( - cookie=ovs_lib.COOKIE_ANY, - table=ovs_consts.LOCAL_SWITCHING, - priority=ovs_consts.OPENFLOW_MAX_PRIORITY - 1, - in_port=bridge.get_port_ofport(port_name), - strict=True, - ) class OVSConnectionTester(OVSBaseConnectionTester): diff --git a/neutron/tests/common/net_helpers.py b/neutron/tests/common/net_helpers.py index 61fcbc0eb91..c462d1c8dca 100644 --- a/neutron/tests/common/net_helpers.py +++ b/neutron/tests/common/net_helpers.py @@ -858,6 +858,9 @@ class OVSPortFixture(PortFixture): # on ports that we intend to use as fake vm interfaces, they # need to be flat. This is related to lp#1767422 self.bridge.clear_db_attribute("Port", port_name, "tag") + # Clear vlan_mode that is added for each new port. lp#1930414 + self.bridge.clear_db_attribute("Port", port_name, "vlan_mode") + self.bridge.clear_db_attribute("Port", port_name, "trunks") self.addCleanup(self.bridge.delete_port, port_name) self.port = ip_lib.IPDevice(port_name, self.namespace) diff --git a/neutron/tests/functional/agent/l2/base.py b/neutron/tests/functional/agent/l2/base.py index ef4d6906f6a..47bff66cce7 100644 --- a/neutron/tests/functional/agent/l2/base.py +++ b/neutron/tests/functional/agent/l2/base.py @@ -465,13 +465,16 @@ class OVSAgentTestFramework(base.BaseOVSLinuxTestCase, OVSOFControllerHelper): self._plug_ports(network, ports, self.agent, bridge=phys_br, namespace=namespace) - if network_type == 'flat': - # NOTE(slaweq): for OVS implementations remove the DEAD VLAN tag - # on ports that belongs to flat network. DEAD VLAN tag is added - # to each newly created port. This is related to lp#1767422 - for port in ports: + for port in ports: + if network_type == 'flat': + # NOTE(slaweq): for OVS implementations remove the DEAD VLAN + # tag on ports that belongs to flat network. DEAD VLAN tag is + # added to each newly created port. + # This is related to lp#1767422 phys_br.clear_db_attribute("Port", port['vif_name'], "tag") - elif phys_segmentation_id and network_type == 'vlan': - for port in ports: + elif phys_segmentation_id and network_type == 'vlan': phys_br.set_db_attribute( "Port", port['vif_name'], "tag", phys_segmentation_id) + # Clear vlan_mode that is added for each new port. lp#1930414 + phys_br.clear_db_attribute("Port", port['vif_name'], "vlan_mode") + phys_br.clear_db_attribute("Port", port['vif_name'], "trunks") diff --git a/neutron/tests/functional/agent/l3/framework.py b/neutron/tests/functional/agent/l3/framework.py index 98c966ed735..af476b7085a 100644 --- a/neutron/tests/functional/agent/l3/framework.py +++ b/neutron/tests/functional/agent/l3/framework.py @@ -98,6 +98,8 @@ class L3AgentTestFramework(base.BaseSudoTestCase): self.mock_plugin_api = mock.patch( 'neutron.agent.l3.agent.L3PluginApi').start().return_value mock.patch('neutron.agent.rpc.PluginReportStateAPI').start() + mock.patch('neutron.agent.common.ovs_lib.' + 'OVSBridge._set_port_dead').start() l3_config.register_l3_agent_config_opts(l3_config.OPTS, cfg.CONF) self.conf = self._configure_agent('agent1') self.agent = neutron_l3_agent.L3NATAgentWithStateReport('agent1', diff --git a/neutron/tests/functional/agent/test_dhcp_agent.py b/neutron/tests/functional/agent/test_dhcp_agent.py index d20d7f04f62..995af6c0144 100644 --- a/neutron/tests/functional/agent/test_dhcp_agent.py +++ b/neutron/tests/functional/agent/test_dhcp_agent.py @@ -35,7 +35,6 @@ from neutron.agent.linux import utils from neutron.agent.metadata import driver as metadata_driver from neutron.common import utils as common_utils from neutron.conf.agent import common as config -from neutron.plugins.ml2.drivers.openvswitch.agent.common import constants from neutron.tests.common import net_helpers from neutron.tests.functional.agent.linux import helpers from neutron.tests.functional import base @@ -74,9 +73,8 @@ class DHCPAgentOVSTestFramework(base.BaseSudoTestCase): 'interface_driver', 'neutron.agent.linux.interface.OVSInterfaceDriver') self.conf.set_override('report_interval', 0, 'AGENT') - self.br_int = self.useFixture(net_helpers.OVSBridgeFixture()).bridge - self.conf.set_override( - 'integration_bridge', self.br_int.br_name, 'OVS') + br_int = self.useFixture(net_helpers.OVSBridgeFixture()).bridge + self.conf.set_override('integration_bridge', br_int.br_name, 'OVS') self.mock_plugin_api = mock.patch( 'neutron.agent.dhcp.agent.DhcpPluginApi').start().return_value @@ -87,6 +85,9 @@ class DHCPAgentOVSTestFramework(base.BaseSudoTestCase): self.conf.set_override('check_child_processes_interval', 1, 'AGENT') + mock.patch('neutron.agent.common.ovs_lib.' + 'OVSBridge._set_port_dead').start() + def network_dict_for_dhcp(self, dhcp_enabled=True, ip_version=lib_const.IP_VERSION_4, prefix_override=None): @@ -293,6 +294,13 @@ class DHCPAgentOVSTestCase(DHCPAgentOVSTestFramework): dev = ip_lib.IPDevice(iface_name, network.namespace) self.assertEqual(789, dev.link.mtu) + def test_good_address_allocation(self): + network, port = self._get_network_port_for_allocation_test() + network.ports.append(port) + self.configure_dhcp_for_network(network=network) + self._plug_port_for_dhcp_request(network, port) + self.assert_good_allocation_for_port(network, port) + def test_bad_address_allocation(self): network, port = self._get_network_port_for_allocation_test() network.ports.append(port) @@ -443,25 +451,3 @@ class DHCPAgentOVSTestCase(DHCPAgentOVSTestFramework): else: self.assertEqual(agent.DHCP_PROCESS_GREENLET_MAX, self.agent._pool.size) - - -class DHCPAgentOVSTestCaseOwnBridge(DHCPAgentOVSTestFramework): - """Class for a single test that needs its own bridge. - - This way the ofport numbers are predictable and we can fake ovs-agent's - flow deletions with hardcoded ofport numbers. - """ - - def test_good_address_allocation(self): - network, port = self._get_network_port_for_allocation_test() - network.ports.append(port) - self.configure_dhcp_for_network(network=network) - self._plug_port_for_dhcp_request(network, port) - for ofport in (1, 2): - self.br_int.delete_flows( - cookie=ovs_lib.COOKIE_ANY, - table=constants.LOCAL_SWITCHING, - priority=constants.OPENFLOW_MAX_PRIORITY - 1, - in_port=ofport, - strict=True) - self.assert_good_allocation_for_port(network, port) diff --git a/neutron/tests/functional/agent/test_firewall.py b/neutron/tests/functional/agent/test_firewall.py index fa42a910241..f1da09e75a5 100644 --- a/neutron/tests/functional/agent/test_firewall.py +++ b/neutron/tests/functional/agent/test_firewall.py @@ -19,6 +19,7 @@ import copy import functools +from unittest import mock import netaddr from neutron_lib import constants @@ -139,6 +140,8 @@ class BaseFirewallTestCase(linux_base.BaseOVSLinuxTestCase): conn_testers.OVSConnectionTester(self.ip_cidr, self.of_helper.br_int_cls)) firewall_drv = openvswitch_firewall.OVSFirewallDriver(tester.bridge) + mock.patch('neutron.agent.common.ovs_lib.' + 'OVSBridge._set_port_dead').start() return tester, firewall_drv def assign_vlan_to_peers(self): diff --git a/neutron/tests/functional/agent/test_ovs_lib.py b/neutron/tests/functional/agent/test_ovs_lib.py index 22d185c3651..ecdfa996387 100644 --- a/neutron/tests/functional/agent/test_ovs_lib.py +++ b/neutron/tests/functional/agent/test_ovs_lib.py @@ -92,6 +92,10 @@ class OVSBridgeTestCase(OVSBridgeTestBase): 'external_ids')['test']) self.assertEqual(agent_const.DEAD_VLAN_TAG, self.br.db_get_val('Port', port_name, 'tag')) + self.assertEqual("trunk", + self.br.db_get_val('Port', port_name, 'vlan_mode')) + self.assertEqual(4095, + self.br.db_get_val('Port', port_name, 'trunks')) def test_attribute_lifecycle(self): (port_name, ofport) = self.create_ovs_port() diff --git a/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py b/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py index 3842739644e..01fd692c8a0 100644 --- a/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py +++ b/neutron/tests/functional/services/trunk/drivers/openvswitch/agent/test_trunk_manager.py @@ -163,6 +163,8 @@ class TrunkManagerTestCase(base.BaseSudoTestCase): self.tester.bridge) self.trunk = trunk_manager.TrunkParentPort( trunk_id, uuidutils.generate_uuid()) + mock.patch('neutron.agent.common.ovs_lib.' + 'OVSBridge._set_port_dead').start() def test_connectivity(self): """Test connectivity with trunk and sub ports.