diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 771d651eea3..8e0527ba044 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -46,9 +46,13 @@ class HaRouterNamespace(namespaces.RouterNamespace): This namespace sets the ip_nonlocal_bind to 0 for HA router namespaces. It does so to prevent sending gratuitous ARPs for interfaces that got VIP removed in the middle of processing. + It also disables ipv6 forwarding by default. Forwarding will be + enabled during router configuration processing only for the master node. + It has to be disabled on all other nodes to avoid sending MLD packets + which cause lost connectivity to Floating IPs. """ def create(self): - super(HaRouterNamespace, self).create() + super(HaRouterNamespace, self).create(ipv6_forwarding=False) # HA router namespaces should not have ip_nonlocal_bind enabled ip_lib.set_ip_nonlocal_bind_for_namespace(self.name) diff --git a/neutron/agent/l3/namespaces.py b/neutron/agent/l3/namespaces.py index 71e8cbcf35b..58b0deb6075 100644 --- a/neutron/agent/l3/namespaces.py +++ b/neutron/agent/l3/namespaces.py @@ -88,7 +88,7 @@ class Namespace(object): self.driver = driver self.use_ipv6 = use_ipv6 - def create(self): + def create(self, ipv6_forwarding=True): # See networking (netdev) tree, file # Documentation/networking/ip-sysctl.txt for an explanation of # these sysctl values. @@ -103,7 +103,8 @@ class Namespace(object): cmd = ['sysctl', '-w', 'net.ipv4.conf.all.arp_announce=2'] ip_wrapper.netns.execute(cmd) if self.use_ipv6: - cmd = ['sysctl', '-w', 'net.ipv6.conf.all.forwarding=1'] + cmd = ['sysctl', '-w', + 'net.ipv6.conf.all.forwarding=%d' % int(ipv6_forwarding)] ip_wrapper.netns.execute(cmd) def delete(self): diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py index 290a76fc04a..67e0572463f 100644 --- a/neutron/agent/linux/ip_lib.py +++ b/neutron/agent/linux/ip_lib.py @@ -1302,3 +1302,10 @@ def set_ip_nonlocal_bind_for_namespace(namespace): "different network node, and the peer side getting a " "populated ARP cache for a given floating IP address.", IP_NONLOCAL_BIND) + + +def get_ipv6_forwarding(device, namespace=None): + """Get kernel value of IPv6 forwarding for device in given namespace.""" + cmd = ['sysctl', '-b', "net.ipv6.conf.%s.forwarding" % device] + ip_wrapper = IPWrapper(namespace) + return int(ip_wrapper.netns.execute(cmd, run_as_root=True)) diff --git a/neutron/tests/fullstack/base.py b/neutron/tests/fullstack/base.py index cf84b506cc1..8adae386c81 100644 --- a/neutron/tests/fullstack/base.py +++ b/neutron/tests/fullstack/base.py @@ -12,13 +12,17 @@ # License for the specific language governing permissions and limitations # under the License. +from concurrent import futures import os from oslo_config import cfg +from oslo_log import log as logging +from neutron.common import utils as common_utils from neutron.conf.agent import common as config from neutron.tests import base as tests_base from neutron.tests.common import helpers +from neutron.tests.common import net_helpers from neutron.tests.fullstack.resources import client as client_resource from neutron.tests import tools from neutron.tests.unit import testlib_api @@ -29,6 +33,8 @@ DEFAULT_LOG_DIR = os.path.join(helpers.get_test_log_path(), 'dsvm-fullstack-logs') ROOTDIR = os.path.dirname(__file__) +LOG = logging.getLogger(__name__) + class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin, testlib_api.SqlTestCase): @@ -73,3 +79,27 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin, def get_name(self): class_name, test_name = self.id().split(".")[-2:] return "%s.%s" % (class_name, test_name) + + def _assert_ping_during_agents_restart( + self, agents, src_namespace, ips, restart_timeout=10, + ping_timeout=1, count=10): + with net_helpers.async_ping( + src_namespace, ips, timeout=ping_timeout, + count=count) as done: + LOG.debug("Restarting agents") + executor = futures.ThreadPoolExecutor(max_workers=len(agents)) + restarts = [agent.restart(executor=executor) + for agent in agents] + + futures.wait(restarts, timeout=restart_timeout) + + self.assertTrue(all([r.done() for r in restarts])) + LOG.debug("Restarting agents - done") + + # It is necessary to give agents time to initialize + # because some crucial steps (e.g. setting up bridge flows) + # happen only after RPC is established + common_utils.wait_until_true( + done, + exception=RuntimeError("Could not ping the other VM, L2 agent " + "restart leads to network disruption")) diff --git a/neutron/tests/fullstack/test_connectivity.py b/neutron/tests/fullstack/test_connectivity.py index c8650b0053d..3f498e88cfe 100644 --- a/neutron/tests/fullstack/test_connectivity.py +++ b/neutron/tests/fullstack/test_connectivity.py @@ -12,7 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -from concurrent import futures import signal from neutron_lib import constants @@ -246,22 +245,6 @@ class TestUninterruptedConnectivityOnL2AgentRestart( # Restart agents on all nodes simultaneously while pinging across # the hosts. The ping has to cross int and phys bridges and travels # via central bridge as the vms are on separate hosts. - with net_helpers.async_ping(ns0, [ip1], timeout=2, - count=agent_restart_timeout) as done: - LOG.debug("Restarting agents") - executor = futures.ThreadPoolExecutor(max_workers=len(agents)) - restarts = [agent.restart(executor=executor) - for agent in agents] - - futures.wait(restarts, timeout=agent_restart_timeout) - - self.assertTrue(all([r.done() for r in restarts])) - LOG.debug("Restarting agents - done") - - # It is necessary to give agents time to initialize - # because some crucial steps (e.g. setting up bridge flows) - # happen only after RPC is established - common_utils.wait_until_true( - done, - exception=RuntimeError("Could not ping the other VM, L2 agent " - "restart leads to network disruption")) + self._assert_ping_during_agents_restart( + agents, ns0, [ip1], restart_timeout=agent_restart_timeout, + ping_timeout=2, count=agent_restart_timeout) diff --git a/neutron/tests/fullstack/test_l3_agent.py b/neutron/tests/fullstack/test_l3_agent.py index f67be5953d5..1cc299ad0a7 100644 --- a/neutron/tests/fullstack/test_l3_agent.py +++ b/neutron/tests/fullstack/test_l3_agent.py @@ -321,3 +321,33 @@ class TestHAL3Agent(TestL3Agent): self.assertEqual( "master", self._get_keepalived_state(keepalived_state_file)) + + def test_ha_router_restart_agents_no_packet_lost(self): + tenant_id = uuidutils.generate_uuid() + ext_net, ext_sub = self._create_external_network_and_subnet(tenant_id) + router = self.safe_client.create_router(tenant_id, ha=True, + external_network=ext_net['id']) + + external_vm = self.useFixture( + machine_fixtures.FakeMachine( + self.environment.central_external_bridge, + common_utils.ip_to_cidr(ext_sub['gateway_ip'], 24))) + + common_utils.wait_until_true( + lambda: + len(self.client.list_l3_agent_hosting_routers( + router['id'])['agents']) == 2, + timeout=90) + + common_utils.wait_until_true( + functools.partial( + self._is_ha_router_active_on_one_agent, + router['id']), + timeout=90) + + router_ip = router['external_gateway_info'][ + 'external_fixed_ips'][0]['ip_address'] + l3_agents = [host.agents['l3'] for host in self.environment.hosts] + + self._assert_ping_during_agents_restart( + l3_agents, external_vm.namespace, [router_ip], count=60) diff --git a/neutron/tests/functional/agent/l3/test_ha_router.py b/neutron/tests/functional/agent/l3/test_ha_router.py index 413aea88f7c..fe03766f1e2 100644 --- a/neutron/tests/functional/agent/l3/test_ha_router.py +++ b/neutron/tests/functional/agent/l3/test_ha_router.py @@ -335,6 +335,28 @@ class L3HATestCase(framework.L3AgentTestFramework): raise self.assertEqual(0, ip_nonlocal_bind_value) + def test_ha_router_namespace_has_ipv6_forwarding_disabled(self): + router_info = self.generate_router_info(enable_ha=True) + router_info[constants.HA_INTERFACE_KEY]['status'] = ( + constants.PORT_STATUS_DOWN) + router = self.manage_router(self.agent, router_info) + external_port = router.get_ex_gw_port() + external_device_name = router.get_external_device_name( + external_port['id']) + + common_utils.wait_until_true(lambda: router.ha_state == 'backup') + self.assertEqual( + 0, ip_lib.get_ipv6_forwarding(device=external_device_name, + namespace=router.ns_name)) + + router.router[constants.HA_INTERFACE_KEY]['status'] = ( + constants.PORT_STATUS_ACTIVE) + self.agent._process_updated_router(router.router) + common_utils.wait_until_true(lambda: router.ha_state == 'master') + self.assertEqual( + 1, ip_lib.get_ipv6_forwarding(device=external_device_name, + namespace=router.ns_name)) + class L3HATestFailover(framework.L3AgentTestFramework): diff --git a/tools/configure_for_func_testing.sh b/tools/configure_for_func_testing.sh index 2db7a6831bd..e8324e8e679 100755 --- a/tools/configure_for_func_testing.sh +++ b/tools/configure_for_func_testing.sh @@ -278,6 +278,11 @@ function _configure_iptables_rules { } +function _enable_ipv6 { + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=0 +} + + function configure_host_for_func_testing { echo_summary "Configuring host for functional testing" @@ -305,6 +310,7 @@ if [[ "$IS_GATE" != "True" ]]; then fi if [[ "$VENV" =~ "dsvm-fullstack" ]]; then + _enable_ipv6 _configure_iptables_rules # This module only exists on older kernels, built-in otherwise modinfo ip_conntrack_proto_sctp 1> /dev/null 2>&1 && sudo modprobe ip_conntrack_proto_sctp