Merge "Add support for Keepalived VRRP health check"
This commit is contained in:
commit
b90ec94dc3
@ -129,7 +129,10 @@ class HaRouter(router.RouterInfo):
|
||||
ha_port_cidrs,
|
||||
nopreempt=True,
|
||||
advert_int=self.agent_conf.ha_vrrp_advert_int,
|
||||
priority=self.ha_priority)
|
||||
priority=self.ha_priority,
|
||||
vrrp_health_check_interval=(
|
||||
self.agent_conf.ha_vrrp_health_check_interval),
|
||||
ha_conf_dir=self.keepalived_manager.get_conf_dir())
|
||||
instance.track_interfaces.append(interface_name)
|
||||
|
||||
if self.agent_conf.ha_vrrp_auth_password:
|
||||
|
@ -15,6 +15,7 @@
|
||||
import errno
|
||||
import itertools
|
||||
import os
|
||||
import six
|
||||
|
||||
import netaddr
|
||||
from neutron_lib import exceptions
|
||||
@ -35,6 +36,7 @@ KEEPALIVED_SERVICE_NAME = 'keepalived'
|
||||
KEEPALIVED_EMAIL_FROM = 'neutron@openstack.local'
|
||||
KEEPALIVED_ROUTER_ID = 'neutron'
|
||||
GARP_MASTER_DELAY = 60
|
||||
HEALTH_CHECK_NAME = 'ha_health_check'
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@ -160,7 +162,9 @@ class KeepalivedInstance(object):
|
||||
def __init__(self, state, interface, vrouter_id, ha_cidrs,
|
||||
priority=HA_DEFAULT_PRIORITY, advert_int=None,
|
||||
mcast_src_ip=None, nopreempt=False,
|
||||
garp_master_delay=GARP_MASTER_DELAY):
|
||||
garp_master_delay=GARP_MASTER_DELAY,
|
||||
vrrp_health_check_interval=0,
|
||||
ha_conf_dir=None):
|
||||
self.name = 'VR_%s' % vrouter_id
|
||||
|
||||
if state not in VALID_STATES:
|
||||
@ -178,12 +182,17 @@ class KeepalivedInstance(object):
|
||||
self.vips = []
|
||||
self.virtual_routes = KeepalivedInstanceRoutes()
|
||||
self.authentication = None
|
||||
self.track_script = None
|
||||
self.primary_vip_range = get_free_range(
|
||||
parent_range=constants.PRIVATE_CIDR_RANGE,
|
||||
excluded_ranges=[constants.METADATA_CIDR,
|
||||
constants.DVR_FIP_LL_CIDR] + ha_cidrs,
|
||||
size=PRIMARY_VIP_RANGE_SIZE)
|
||||
|
||||
if vrrp_health_check_interval > 0:
|
||||
self.track_script = KeepalivedTrackScript(
|
||||
vrrp_health_check_interval, ha_conf_dir, self.vrouter_id)
|
||||
|
||||
def set_authentication(self, auth_type, password):
|
||||
if auth_type not in VALID_AUTH_TYPES:
|
||||
raise InvalidAuthenticationTypeException(auth_type=auth_type)
|
||||
@ -267,12 +276,19 @@ class KeepalivedInstance(object):
|
||||
[' }'])
|
||||
|
||||
def build_config(self):
|
||||
config = ['vrrp_instance %s {' % self.name,
|
||||
' state %s' % self.state,
|
||||
' interface %s' % self.interface,
|
||||
' virtual_router_id %s' % self.vrouter_id,
|
||||
' priority %s' % self.priority,
|
||||
' garp_master_delay %s' % self.garp_master_delay]
|
||||
if self.track_script:
|
||||
config = self.track_script.build_config_preamble()
|
||||
self.track_script.routes = self.virtual_routes.gateway_routes
|
||||
self.track_script.vips = self.vips
|
||||
else:
|
||||
config = []
|
||||
|
||||
config.extend(['vrrp_instance %s {' % self.name,
|
||||
' state %s' % self.state,
|
||||
' interface %s' % self.interface,
|
||||
' virtual_router_id %s' % self.vrouter_id,
|
||||
' priority %s' % self.priority,
|
||||
' garp_master_delay %s' % self.garp_master_delay])
|
||||
|
||||
if self.nopreempt:
|
||||
config.append(' nopreempt')
|
||||
@ -299,6 +315,9 @@ class KeepalivedInstance(object):
|
||||
if len(self.virtual_routes):
|
||||
config.extend(self.virtual_routes.build_config())
|
||||
|
||||
if self.track_script:
|
||||
config.extend(self.track_script.build_config())
|
||||
|
||||
config.append('}')
|
||||
|
||||
return config
|
||||
@ -406,6 +425,10 @@ class KeepalivedManager(object):
|
||||
|
||||
keepalived_pm.enable(reload_cfg=True)
|
||||
|
||||
for key, instance in six.iteritems(self.config.instances):
|
||||
if instance.track_script:
|
||||
instance.track_script.write_check_script()
|
||||
|
||||
self.process_monitor.register(uuid=self.resource_id,
|
||||
service_name=KEEPALIVED_SERVICE_NAME,
|
||||
monitored_process=keepalived_pm)
|
||||
@ -453,3 +476,81 @@ class KeepalivedManager(object):
|
||||
return cmd
|
||||
|
||||
return callback
|
||||
|
||||
|
||||
class KeepalivedTrackScript(KeepalivedConf):
|
||||
"""Track script generator for Keepalived"""
|
||||
|
||||
def __init__(self, interval, conf_dir, vr_id):
|
||||
self.interval = interval
|
||||
self.conf_dir = conf_dir
|
||||
self.vr_id = vr_id
|
||||
self.routes = []
|
||||
self.vips = []
|
||||
|
||||
def build_config_preamble(self):
|
||||
config = ['',
|
||||
'vrrp_script %s_%s {' % (HEALTH_CHECK_NAME, self.vr_id),
|
||||
' script "%s"' % self._get_script_location(),
|
||||
' interval %s' % self.interval,
|
||||
' fall 2',
|
||||
' rise 2',
|
||||
'}',
|
||||
'']
|
||||
|
||||
return config
|
||||
|
||||
def _is_needed(self):
|
||||
"""Check if track script is needed by checking amount of routes.
|
||||
|
||||
:return: True/False
|
||||
"""
|
||||
return len(self.routes) > 0
|
||||
|
||||
def build_config(self):
|
||||
if not self._is_needed():
|
||||
return ''
|
||||
|
||||
config = [' track_script {',
|
||||
' %s_%s' % (HEALTH_CHECK_NAME, self.vr_id),
|
||||
' }']
|
||||
|
||||
return config
|
||||
|
||||
def build_script(self):
|
||||
return itertools.chain(['#!/bin/bash -eu'],
|
||||
['%s' % self._check_ip_assigned()],
|
||||
('%s' % self._add_ip_addr(route.nexthop)
|
||||
for route in self.routes if route.nexthop),
|
||||
)
|
||||
|
||||
def _add_ip_addr(self, ip_addr):
|
||||
cmd = {
|
||||
4: 'ping',
|
||||
6: 'ping6',
|
||||
}.get(netaddr.IPAddress(ip_addr).version)
|
||||
|
||||
return '%s -c 1 -w 1 %s 1>/dev/null || exit 1' % (cmd, ip_addr)
|
||||
|
||||
def _check_ip_assigned(self):
|
||||
cmd = 'ip a | grep %s || exit 0'
|
||||
return cmd % netaddr.IPNetwork(self.vips[0].ip_address).ip if len(
|
||||
self.vips) else ''
|
||||
|
||||
def _get_script_str(self):
|
||||
"""Generates and returns bash script to verify connectivity.
|
||||
|
||||
:return: Bash script code
|
||||
"""
|
||||
return '\n'.join(self.build_script())
|
||||
|
||||
def _get_script_location(self):
|
||||
return os.path.join(self.conf_dir,
|
||||
'ha_check_script_%s.sh' % self.vr_id)
|
||||
|
||||
def write_check_script(self):
|
||||
if not self._is_needed():
|
||||
return
|
||||
|
||||
file_utils.replace_file(
|
||||
self._get_script_location(), self._get_script_str(), 0o520)
|
||||
|
@ -43,6 +43,18 @@ OPTS = [
|
||||
'keepalived server connection requests. '
|
||||
'More threads create a higher CPU load '
|
||||
'on the agent node.')),
|
||||
cfg.IntOpt('ha_vrrp_health_check_interval',
|
||||
default=0,
|
||||
help=_('The VRRP health check interval in seconds. Values > 0 '
|
||||
'enable VRRP health checks. Setting it to 0 disables '
|
||||
'VRRP health checks. Recommended value is 5. '
|
||||
'This will cause pings to be sent to the gateway '
|
||||
'IP address(es) - requires ICMP_ECHO_REQUEST '
|
||||
'to be enabled on the gateway. '
|
||||
'If gateway fails, all routers will be reported '
|
||||
'as master, and master election will be repeated '
|
||||
'in round-robin fashion, until one of the router '
|
||||
'restore the gateway connection.')),
|
||||
]
|
||||
|
||||
|
||||
|
@ -591,6 +591,16 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
|
||||
ha_device = ip_lib.IPDevice(device_name, router.ha_namespace)
|
||||
ha_device.link.set_down()
|
||||
|
||||
@staticmethod
|
||||
def fail_gw_router_port(router):
|
||||
r_br = ip_lib.IPDevice(router.driver.conf.external_network_bridge)
|
||||
r_br.link.set_down()
|
||||
|
||||
@staticmethod
|
||||
def restore_gw_router_port(router):
|
||||
r_br = ip_lib.IPDevice(router.driver.conf.external_network_bridge)
|
||||
r_br.link.set_up()
|
||||
|
||||
@classmethod
|
||||
def _get_addresses_on_device(cls, namespace, interface):
|
||||
return [address['cidr'] for address in
|
||||
|
@ -336,6 +336,54 @@ class L3HATestFailover(framework.L3AgentTestFramework):
|
||||
self.assertEqual(master_router, new_slave)
|
||||
self.assertEqual(slave_router, new_master)
|
||||
|
||||
def test_ha_router_lost_gw_connection(self):
|
||||
self.agent.conf.set_override(
|
||||
'ha_vrrp_health_check_interval', 5)
|
||||
self.failover_agent.conf.set_override(
|
||||
'ha_vrrp_health_check_interval', 5)
|
||||
|
||||
router1, router2 = self.create_ha_routers()
|
||||
|
||||
master_router, slave_router = self._get_master_and_slave_routers(
|
||||
router1, router2)
|
||||
|
||||
self.fail_gw_router_port(master_router)
|
||||
|
||||
# NOTE: passing slave_router as first argument, because we expect
|
||||
# that this router should be the master
|
||||
new_master, new_slave = self._get_master_and_slave_routers(
|
||||
slave_router, master_router)
|
||||
|
||||
self.assertEqual(master_router, new_slave)
|
||||
self.assertEqual(slave_router, new_master)
|
||||
|
||||
def test_both_ha_router_lost_gw_connection(self):
|
||||
self.agent.conf.set_override(
|
||||
'ha_vrrp_health_check_interval', 5)
|
||||
self.failover_agent.conf.set_override(
|
||||
'ha_vrrp_health_check_interval', 5)
|
||||
|
||||
router1, router2 = self.create_ha_routers()
|
||||
|
||||
master_router, slave_router = self._get_master_and_slave_routers(
|
||||
router1, router2)
|
||||
|
||||
self.fail_gw_router_port(master_router)
|
||||
self.fail_gw_router_port(slave_router)
|
||||
|
||||
common_utils.wait_until_true(
|
||||
lambda: master_router.ha_state == 'master')
|
||||
common_utils.wait_until_true(
|
||||
lambda: slave_router.ha_state == 'master')
|
||||
|
||||
self.restore_gw_router_port(master_router)
|
||||
|
||||
new_master, new_slave = self._get_master_and_slave_routers(
|
||||
master_router, slave_router)
|
||||
|
||||
self.assertEqual(master_router, new_master)
|
||||
self.assertEqual(slave_router, new_slave)
|
||||
|
||||
|
||||
class LinuxBridgeL3HATestCase(L3HATestCase):
|
||||
INTERFACE_DRIVER = 'neutron.agent.linux.interface.BridgeInterfaceDriver'
|
||||
|
@ -11,11 +11,16 @@
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
from neutron_lib import constants as n_consts
|
||||
import os
|
||||
|
||||
import mock
|
||||
import testtools
|
||||
import textwrap
|
||||
|
||||
from neutron_lib import constants as n_consts
|
||||
|
||||
from neutron.agent.linux import keepalived
|
||||
from neutron.tests import base
|
||||
|
||||
@ -29,6 +34,8 @@ KEEPALIVED_GLOBAL_CONFIG = textwrap.dedent("""\
|
||||
}""") % dict(
|
||||
email_from=keepalived.KEEPALIVED_EMAIL_FROM,
|
||||
router_id=keepalived.KEEPALIVED_ROUTER_ID)
|
||||
VRRP_ID = 1
|
||||
VRRP_INTERVAL = 5
|
||||
|
||||
|
||||
class KeepalivedGetFreeRangeTestCase(base.BaseTestCase):
|
||||
@ -316,7 +323,32 @@ class KeepalivedInstanceTestCase(base.BaseTestCase,
|
||||
}
|
||||
}""")
|
||||
instance = keepalived.KeepalivedInstance(
|
||||
'MASTER', 'eth0', 1, ['169.254.192.0/18'])
|
||||
'MASTER', 'eth0', VRRP_ID, ['169.254.192.0/18'])
|
||||
self.assertEqual(expected, os.linesep.join(instance.build_config()))
|
||||
|
||||
def test_build_config_no_vips_track_script(self):
|
||||
expected = """
|
||||
vrrp_script ha_health_check_1 {
|
||||
script "/etc/ha_confs/qrouter-x/ha_check_script_1.sh"
|
||||
interval 5
|
||||
fall 2
|
||||
rise 2
|
||||
}
|
||||
|
||||
vrrp_instance VR_1 {
|
||||
state MASTER
|
||||
interface eth0
|
||||
virtual_router_id 1
|
||||
priority 50
|
||||
garp_master_delay 60
|
||||
virtual_ipaddress {
|
||||
169.254.0.1/24 dev eth0
|
||||
}
|
||||
}"""
|
||||
instance = keepalived.KeepalivedInstance(
|
||||
'MASTER', 'eth0', VRRP_ID, ['169.254.192.0/18'])
|
||||
instance.track_script = keepalived.KeepalivedTrackScript(
|
||||
VRRP_INTERVAL, '/etc/ha_confs/qrouter-x', VRRP_ID)
|
||||
self.assertEqual(expected, '\n'.join(instance.build_config()))
|
||||
|
||||
|
||||
@ -346,3 +378,74 @@ class KeepalivedVirtualRouteTestCase(base.BaseTestCase):
|
||||
def test_virtual_route_without_dev(self):
|
||||
route = keepalived.KeepalivedVirtualRoute('50.0.0.0/8', '1.2.3.4')
|
||||
self.assertEqual('50.0.0.0/8 via 1.2.3.4', route.build_config())
|
||||
|
||||
|
||||
class KeepalivedTrackScriptTestCase(base.BaseTestCase):
|
||||
|
||||
def test_build_config_preamble(self):
|
||||
exp_conf = [
|
||||
'',
|
||||
'vrrp_script ha_health_check_1 {',
|
||||
' script "/etc/ha_confs/qrouter-x/ha_check_script_1.sh"',
|
||||
' interval 5',
|
||||
' fall 2',
|
||||
' rise 2',
|
||||
'}',
|
||||
'']
|
||||
ts = keepalived.KeepalivedTrackScript(
|
||||
VRRP_INTERVAL, '/etc/ha_confs/qrouter-x', VRRP_ID)
|
||||
self.assertEqual(exp_conf, ts.build_config_preamble())
|
||||
|
||||
def test_get_config_str(self):
|
||||
ts = keepalived.KeepalivedTrackScript(
|
||||
VRRP_INTERVAL, '/etc/ha_confs/qrouter-x', VRRP_ID)
|
||||
ts.routes = [
|
||||
keepalived.KeepalivedVirtualRoute('12.0.0.0/24', '10.0.0.1'), ]
|
||||
self.assertEqual(''' track_script {
|
||||
ha_health_check_1
|
||||
}''',
|
||||
ts.get_config_str())
|
||||
|
||||
def test_get_script_str(self):
|
||||
ts = keepalived.KeepalivedTrackScript(
|
||||
VRRP_INTERVAL, '/etc/ha_confs/qrouter-x', VRRP_ID)
|
||||
ts.routes = [
|
||||
keepalived.KeepalivedVirtualRoute('12.0.0.0/24', '10.0.0.1'), ]
|
||||
ts.vips = [
|
||||
keepalived.KeepalivedVipAddress('192.168.0.3/18', 'ha-xxx'), ]
|
||||
|
||||
self.assertEqual("""#!/bin/bash -eu
|
||||
ip a | grep 192.168.0.3 || exit 0
|
||||
ping -c 1 -w 1 10.0.0.1 1>/dev/null || exit 1""",
|
||||
ts._get_script_str())
|
||||
|
||||
def test_get_script_str_no_routes(self):
|
||||
ts = keepalived.KeepalivedTrackScript(
|
||||
VRRP_INTERVAL, '/etc/ha_confs/qrouter-x', VRRP_ID)
|
||||
|
||||
self.assertEqual('#!/bin/bash -eu\n', ts._get_script_str())
|
||||
|
||||
def test_write_check_script(self):
|
||||
conf_dir = '/etc/ha_confs/qrouter-x'
|
||||
ts = keepalived.KeepalivedTrackScript(VRRP_INTERVAL, conf_dir, VRRP_ID)
|
||||
ts.routes = [
|
||||
keepalived.KeepalivedVirtualRoute('12.0.0.0/24', '10.0.0.1'),
|
||||
keepalived.KeepalivedVirtualRoute('2001:db8::1', '2001:db8::1'), ]
|
||||
with mock.patch.object(keepalived, 'file_utils') as patched_utils:
|
||||
ts.write_check_script()
|
||||
patched_utils.replace_file.assert_called_with(
|
||||
os.path.join(conf_dir, 'ha_check_script_1.sh'),
|
||||
"""#!/bin/bash -eu
|
||||
|
||||
ping -c 1 -w 1 10.0.0.1 1>/dev/null || exit 1
|
||||
ping6 -c 1 -w 1 2001:db8::1 1>/dev/null || exit 1""",
|
||||
0o520
|
||||
)
|
||||
|
||||
def test_write_check_script_no_routes(self):
|
||||
conf_dir = '/etc/ha_confs/qrouter-x'
|
||||
ts = keepalived.KeepalivedTrackScript(
|
||||
VRRP_INTERVAL, conf_dir, VRRP_ID)
|
||||
with mock.patch.object(keepalived, 'file_utils') as patched_utils:
|
||||
ts.write_check_script()
|
||||
patched_utils.replace_file.assert_not_called()
|
||||
|
@ -0,0 +1,11 @@
|
||||
---
|
||||
prelude: >
|
||||
Keepalived VRRP health check functionality to enable verification of
|
||||
connectivity from the "master" router to all gateways.
|
||||
features:
|
||||
- Activation of this feature enables gateway connectivity validation and
|
||||
rescheduling of the "master" router to another node when connectivity
|
||||
is lost. If all routers lose connectivity to the gateways, the election
|
||||
process will be repeated round-robin until one of the routers restores
|
||||
its gateway connection. In the mean time, all of the routers will be
|
||||
reported as "master".
|
Loading…
Reference in New Issue
Block a user