Wait for HA router to be active on at least one agent

Due to related bug it may happend that HA router will become active on
one of the L3 agents after few minutes since it was created. And during
that time when it's in "standby" mode, VM can be spawned, it will try
to get metadata but haproxy in the router namespace is still not running
so there will be no metadata available.
That will end up with SSH authentication error as there will be no
proper SSH key configured in the VM.

Originally this patch was intended just as a workaround for the related
bug but I think it's worth to keep it even if that bug will be solved on
the Neutron's side as it may give us more clear information about the
real problem in the future if something similar will happen and HA
router will not be active on any of the L3 agents.

Related-Bug: #1923633
Change-Id: I8c8b7c11c63ffeee4f776695f32ae686793299b3
This commit is contained in:
Slawek Kaplonski 2021-04-21 10:34:02 +02:00
parent d4988c4f32
commit edf3cba046
7 changed files with 29 additions and 4 deletions

View File

@ -206,8 +206,29 @@ class BaseTempestTestCase(base_api.BaseNetworkTest):
else:
router = cls.create_admin_router(**kwargs)
LOG.debug("Created router %s", router['name'])
cls._wait_for_router_ha_active(router['id'])
return router
@classmethod
def _wait_for_router_ha_active(cls, router_id):
router = cls.os_admin.network_client.show_router(router_id)['router']
if not router.get('ha'):
return
def _router_active_on_l3_agent():
agents = cls.os_admin.network_client.list_l3_agents_hosting_router(
router_id)['agents']
return "active" in [agent['ha_state'] for agent in agents]
error_msg = (
"Router %s is not active on any of the L3 agents" % router_id)
# NOTE(slaweq): timeout here should be lower for sure, but due to
# the bug https://launchpad.net/bugs/1923633 let's wait even 10
# minutes until router will be active on some of the L3 agents
utils.wait_until_true(_router_active_on_l3_agent,
timeout=600, sleep=5,
exception=lib_exc.TimeoutException(error_msg))
@classmethod
def skip_if_no_extension_enabled_in_l3_agents(cls, extension):
l3_agents = cls.os_admin.network_client.list_agents(

View File

@ -24,7 +24,7 @@ CONF = config.CONF
class NetworkBasicTest(base.BaseTempestTestCase):
credentials = ['primary']
credentials = ['primary', 'admin']
force_tenant_isolation = False
# Default to ipv4.

View File

@ -88,6 +88,8 @@ class NetworkConnectivityTest(base.BaseTempestTestCase):
ap2_rt = self.create_router(
router_name=data_utils.rand_name("ap2_rt"),
admin_state_up=True)
self._wait_for_router_ha_active(ap1_rt['id'])
self._wait_for_router_ha_active(ap2_rt['id'])
ap1_internal_port = self.create_port(
ap1_net, security_groups=[self.secgroup['id']])
@ -140,6 +142,7 @@ class NetworkConnectivityTest(base.BaseTempestTestCase):
router_name=data_utils.rand_name("east_west_traffic_router"),
admin_state_up=True,
external_network_id=CONF.network.public_network_id)
self._wait_for_router_ha_active(router['id'])
internal_port_1 = self.create_port(
net_1, security_groups=[self.secgroup['id']])

View File

@ -24,6 +24,7 @@ CONF = config.CONF
class InternalDNSTest(base.BaseTempestTestCase):
credentials = ['primary', 'admin']
@utils.requires_ext(extension="dns-integration", service="network")
@decorators.idempotent_id('988347de-07af-471a-abfa-65aea9f452a6')

View File

@ -122,7 +122,7 @@ tcpdump -i %(interface)s host %(group)s -vvneA -s0 -l -c1 &> %(result_file)s &
class BaseMulticastTest(object):
credentials = ['primary']
credentials = ['primary', 'admin']
force_tenant_isolation = False
# Import configuration options

View File

@ -28,7 +28,7 @@ CONF = config.CONF
class PortsTest(base.BaseTempestTestCase):
credentials = ['primary']
credentials = ['primary', 'admin']
@classmethod
def resource_setup(cls):

View File

@ -21,7 +21,7 @@ CONF = config.CONF
class PortSecurityTest(base.BaseTempestTestCase):
credentials = ['primary']
credentials = ['primary', 'admin']
required_extensions = ['port-security']
@decorators.idempotent_id('61ab176e-d48b-42b7-b38a-1ba571ecc033')