Disable IPv6 forwarding by default on HA routers
In case of HA routers IPv6 forwarding is not disabled by default and then enabled only on master node. Before this patch it was done in opposite way, so forwarding was enabled by default and then disabled on backup nodes. When forwarding was enabled/disabled for qg- port, MLDv2 packets are sent and that might lead to temportary packets loss as packets to FIP were sent to this backup node instead of master one. Related-Bug: #1771841 Change-Id: Ia6b772e91c1f94612ca29d7082eca999372e60d6
This commit is contained in:
parent
fab6bcbdcd
commit
3e9e2a5b4b
@ -46,9 +46,13 @@ class HaRouterNamespace(namespaces.RouterNamespace):
|
||||
This namespace sets the ip_nonlocal_bind to 0 for HA router namespaces.
|
||||
It does so to prevent sending gratuitous ARPs for interfaces that got VIP
|
||||
removed in the middle of processing.
|
||||
It also disables ipv6 forwarding by default. Forwarding will be
|
||||
enabled during router configuration processing only for the master node.
|
||||
It has to be disabled on all other nodes to avoid sending MLD packets
|
||||
which cause lost connectivity to Floating IPs.
|
||||
"""
|
||||
def create(self):
|
||||
super(HaRouterNamespace, self).create()
|
||||
super(HaRouterNamespace, self).create(ipv6_forwarding=False)
|
||||
# HA router namespaces should not have ip_nonlocal_bind enabled
|
||||
ip_lib.set_ip_nonlocal_bind_for_namespace(self.name)
|
||||
|
||||
|
@ -88,7 +88,7 @@ class Namespace(object):
|
||||
self.driver = driver
|
||||
self.use_ipv6 = use_ipv6
|
||||
|
||||
def create(self):
|
||||
def create(self, ipv6_forwarding=True):
|
||||
# See networking (netdev) tree, file
|
||||
# Documentation/networking/ip-sysctl.txt for an explanation of
|
||||
# these sysctl values.
|
||||
@ -103,7 +103,8 @@ class Namespace(object):
|
||||
cmd = ['sysctl', '-w', 'net.ipv4.conf.all.arp_announce=2']
|
||||
ip_wrapper.netns.execute(cmd)
|
||||
if self.use_ipv6:
|
||||
cmd = ['sysctl', '-w', 'net.ipv6.conf.all.forwarding=1']
|
||||
cmd = ['sysctl', '-w',
|
||||
'net.ipv6.conf.all.forwarding=%d' % int(ipv6_forwarding)]
|
||||
ip_wrapper.netns.execute(cmd)
|
||||
|
||||
def delete(self):
|
||||
|
@ -1302,3 +1302,10 @@ def set_ip_nonlocal_bind_for_namespace(namespace):
|
||||
"different network node, and the peer side getting a "
|
||||
"populated ARP cache for a given floating IP address.",
|
||||
IP_NONLOCAL_BIND)
|
||||
|
||||
|
||||
def get_ipv6_forwarding(device, namespace=None):
|
||||
"""Get kernel value of IPv6 forwarding for device in given namespace."""
|
||||
cmd = ['sysctl', '-b', "net.ipv6.conf.%s.forwarding" % device]
|
||||
ip_wrapper = IPWrapper(namespace)
|
||||
return int(ip_wrapper.netns.execute(cmd, run_as_root=True))
|
||||
|
@ -12,13 +12,17 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from concurrent import futures
|
||||
import os
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.conf.agent import common as config
|
||||
from neutron.tests import base as tests_base
|
||||
from neutron.tests.common import helpers
|
||||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.fullstack.resources import client as client_resource
|
||||
from neutron.tests import tools
|
||||
from neutron.tests.unit import testlib_api
|
||||
@ -29,6 +33,8 @@ DEFAULT_LOG_DIR = os.path.join(helpers.get_test_log_path(),
|
||||
'dsvm-fullstack-logs')
|
||||
ROOTDIR = os.path.dirname(__file__)
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
|
||||
testlib_api.SqlTestCase):
|
||||
@ -73,3 +79,27 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
|
||||
def get_name(self):
|
||||
class_name, test_name = self.id().split(".")[-2:]
|
||||
return "%s.%s" % (class_name, test_name)
|
||||
|
||||
def _assert_ping_during_agents_restart(
|
||||
self, agents, src_namespace, ips, restart_timeout=10,
|
||||
ping_timeout=1, count=10):
|
||||
with net_helpers.async_ping(
|
||||
src_namespace, ips, timeout=ping_timeout,
|
||||
count=count) as done:
|
||||
LOG.debug("Restarting agents")
|
||||
executor = futures.ThreadPoolExecutor(max_workers=len(agents))
|
||||
restarts = [agent.restart(executor=executor)
|
||||
for agent in agents]
|
||||
|
||||
futures.wait(restarts, timeout=restart_timeout)
|
||||
|
||||
self.assertTrue(all([r.done() for r in restarts]))
|
||||
LOG.debug("Restarting agents - done")
|
||||
|
||||
# It is necessary to give agents time to initialize
|
||||
# because some crucial steps (e.g. setting up bridge flows)
|
||||
# happen only after RPC is established
|
||||
common_utils.wait_until_true(
|
||||
done,
|
||||
exception=RuntimeError("Could not ping the other VM, L2 agent "
|
||||
"restart leads to network disruption"))
|
||||
|
@ -12,7 +12,6 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from concurrent import futures
|
||||
import signal
|
||||
|
||||
from neutron_lib import constants
|
||||
@ -246,22 +245,6 @@ class TestUninterruptedConnectivityOnL2AgentRestart(
|
||||
# Restart agents on all nodes simultaneously while pinging across
|
||||
# the hosts. The ping has to cross int and phys bridges and travels
|
||||
# via central bridge as the vms are on separate hosts.
|
||||
with net_helpers.async_ping(ns0, [ip1], timeout=2,
|
||||
count=agent_restart_timeout) as done:
|
||||
LOG.debug("Restarting agents")
|
||||
executor = futures.ThreadPoolExecutor(max_workers=len(agents))
|
||||
restarts = [agent.restart(executor=executor)
|
||||
for agent in agents]
|
||||
|
||||
futures.wait(restarts, timeout=agent_restart_timeout)
|
||||
|
||||
self.assertTrue(all([r.done() for r in restarts]))
|
||||
LOG.debug("Restarting agents - done")
|
||||
|
||||
# It is necessary to give agents time to initialize
|
||||
# because some crucial steps (e.g. setting up bridge flows)
|
||||
# happen only after RPC is established
|
||||
common_utils.wait_until_true(
|
||||
done,
|
||||
exception=RuntimeError("Could not ping the other VM, L2 agent "
|
||||
"restart leads to network disruption"))
|
||||
self._assert_ping_during_agents_restart(
|
||||
agents, ns0, [ip1], restart_timeout=agent_restart_timeout,
|
||||
ping_timeout=2, count=agent_restart_timeout)
|
||||
|
@ -321,3 +321,33 @@ class TestHAL3Agent(TestL3Agent):
|
||||
self.assertEqual(
|
||||
"master",
|
||||
self._get_keepalived_state(keepalived_state_file))
|
||||
|
||||
def test_ha_router_restart_agents_no_packet_lost(self):
|
||||
tenant_id = uuidutils.generate_uuid()
|
||||
ext_net, ext_sub = self._create_external_network_and_subnet(tenant_id)
|
||||
router = self.safe_client.create_router(tenant_id, ha=True,
|
||||
external_network=ext_net['id'])
|
||||
|
||||
external_vm = self.useFixture(
|
||||
machine_fixtures.FakeMachine(
|
||||
self.environment.central_external_bridge,
|
||||
common_utils.ip_to_cidr(ext_sub['gateway_ip'], 24)))
|
||||
|
||||
common_utils.wait_until_true(
|
||||
lambda:
|
||||
len(self.client.list_l3_agent_hosting_routers(
|
||||
router['id'])['agents']) == 2,
|
||||
timeout=90)
|
||||
|
||||
common_utils.wait_until_true(
|
||||
functools.partial(
|
||||
self._is_ha_router_active_on_one_agent,
|
||||
router['id']),
|
||||
timeout=90)
|
||||
|
||||
router_ip = router['external_gateway_info'][
|
||||
'external_fixed_ips'][0]['ip_address']
|
||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts]
|
||||
|
||||
self._assert_ping_during_agents_restart(
|
||||
l3_agents, external_vm.namespace, [router_ip], count=60)
|
||||
|
@ -335,6 +335,28 @@ class L3HATestCase(framework.L3AgentTestFramework):
|
||||
raise
|
||||
self.assertEqual(0, ip_nonlocal_bind_value)
|
||||
|
||||
def test_ha_router_namespace_has_ipv6_forwarding_disabled(self):
|
||||
router_info = self.generate_router_info(enable_ha=True)
|
||||
router_info[constants.HA_INTERFACE_KEY]['status'] = (
|
||||
constants.PORT_STATUS_DOWN)
|
||||
router = self.manage_router(self.agent, router_info)
|
||||
external_port = router.get_ex_gw_port()
|
||||
external_device_name = router.get_external_device_name(
|
||||
external_port['id'])
|
||||
|
||||
common_utils.wait_until_true(lambda: router.ha_state == 'backup')
|
||||
self.assertEqual(
|
||||
0, ip_lib.get_ipv6_forwarding(device=external_device_name,
|
||||
namespace=router.ns_name))
|
||||
|
||||
router.router[constants.HA_INTERFACE_KEY]['status'] = (
|
||||
constants.PORT_STATUS_ACTIVE)
|
||||
self.agent._process_updated_router(router.router)
|
||||
common_utils.wait_until_true(lambda: router.ha_state == 'master')
|
||||
self.assertEqual(
|
||||
1, ip_lib.get_ipv6_forwarding(device=external_device_name,
|
||||
namespace=router.ns_name))
|
||||
|
||||
|
||||
class L3HATestFailover(framework.L3AgentTestFramework):
|
||||
|
||||
|
@ -278,6 +278,11 @@ function _configure_iptables_rules {
|
||||
}
|
||||
|
||||
|
||||
function _enable_ipv6 {
|
||||
sudo sysctl -w net.ipv6.conf.all.disable_ipv6=0
|
||||
}
|
||||
|
||||
|
||||
function configure_host_for_func_testing {
|
||||
echo_summary "Configuring host for functional testing"
|
||||
|
||||
@ -305,6 +310,7 @@ if [[ "$IS_GATE" != "True" ]]; then
|
||||
fi
|
||||
|
||||
if [[ "$VENV" =~ "dsvm-fullstack" ]]; then
|
||||
_enable_ipv6
|
||||
_configure_iptables_rules
|
||||
# This module only exists on older kernels, built-in otherwise
|
||||
modinfo ip_conntrack_proto_sctp 1> /dev/null 2>&1 && sudo modprobe ip_conntrack_proto_sctp
|
||||
|
Loading…
Reference in New Issue
Block a user