Fix netns_cleanup interrupted on rwd I/O
Functional tests for netns_cleanup have been failing a few times
in the gate lately. After thorough tests we've seen that the issue was
related to using rootwrap-daemon inside a wait_until_true loop. When
timeout fired while utils.execute() was reading from rootwrap-daemon,
it got interrupted and the output of the last command was not read.
Therefore, next calls to utils.execute() would read the output of
their previous command rather than their own, leading to unexpected
results.
This fix will poll existing processes in the namespace without making
use of the wait_until_true loop. Instead, it will check elapsed time
and raise the exception if timeout is exceeded.
Also, i'm removing debug traces introduced in
327f7fc4d5
which helped finding the root
cause of this bug.
Change-Id: Ie233261e4be36eecaf6ec6d0532f0f5e2e996cd2
Closes-Bug: #1654287
This commit is contained in:
parent
ada4237905
commit
3f9f740d81
@ -131,14 +131,10 @@ class IPWrapper(SubProcessBase):
|
||||
cmd = ['ip', 'netns', 'exec', self.namespace,
|
||||
'find', SYS_NET_PATH, '-maxdepth', '1',
|
||||
'-type', 'l', '-printf', '%f ']
|
||||
output_str = utils.execute(
|
||||
output = utils.execute(
|
||||
cmd,
|
||||
run_as_root=True,
|
||||
log_fail_as_error=self.log_fail_as_error)
|
||||
# NOTE(dalvarez): Logging the output of this call due to
|
||||
# bug1654287.
|
||||
LOG.debug('get_devices(): %s', output_str)
|
||||
output = output_str.split()
|
||||
log_fail_as_error=self.log_fail_as_error).split()
|
||||
except RuntimeError:
|
||||
# We could be racing with a cron job deleting namespaces.
|
||||
# Just return a empty list if the namespace is deleted.
|
||||
|
@ -35,7 +35,6 @@ from neutron.agent.linux import interface
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import utils
|
||||
from neutron.common import config
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.conf.agent import cmd
|
||||
from neutron.conf.agent import dhcp as dhcp_config
|
||||
|
||||
@ -162,14 +161,19 @@ def wait_until_no_listen_pids_namespace(namespace, timeout=SIGTERM_WAITTIME):
|
||||
If after timeout seconds, there are remaining processes in the namespace,
|
||||
then a PidsInNamespaceException will be thrown.
|
||||
"""
|
||||
# Would be better to handle an eventlet.timeout.Timeout exception
|
||||
# but currently there's a problem importing eventlet since it's
|
||||
# doing a local import from cmd/eventlet which doesn't have a
|
||||
# timeout module
|
||||
common_utils.wait_until_true(
|
||||
lambda: not find_listen_pids_namespace(namespace),
|
||||
timeout=SIGTERM_WAITTIME,
|
||||
exception=PidsInNamespaceException)
|
||||
# NOTE(dalvarez): This function can block forever if
|
||||
# find_listen_pids_in_namespace never returns which is really unlikely. We
|
||||
# can't use wait_until_true because we might get interrupted by eventlet
|
||||
# Timeout during our I/O with rootwrap daemon and that will lead to errors
|
||||
# in subsequent calls to utils.execute grabbing always the output of the
|
||||
# previous command
|
||||
start = end = time.time()
|
||||
while end - start < timeout:
|
||||
if not find_listen_pids_namespace(namespace):
|
||||
return
|
||||
time.sleep(1)
|
||||
end = time.time()
|
||||
raise PidsInNamespaceException
|
||||
|
||||
|
||||
def _kill_listen_processes(namespace, force=False):
|
||||
|
@ -274,11 +274,9 @@ class TestNetnsCleanup(base.BaseTestCase):
|
||||
def test_kill_listen_processes(self):
|
||||
with mock.patch.object(util, '_kill_listen_processes',
|
||||
return_value=1) as mock_kill_listen:
|
||||
with mock.patch('neutron.common.utils.wait_until_true')\
|
||||
as wait_until_true_mock:
|
||||
wait_until_true_mock.side_effect = [
|
||||
util.PidsInNamespaceException,
|
||||
None]
|
||||
with mock.patch.object(util, 'wait_until_no_listen_pids_namespace',
|
||||
side_effect=[util.PidsInNamespaceException,
|
||||
None]):
|
||||
namespace = mock.ANY
|
||||
util.kill_listen_processes(namespace)
|
||||
mock_kill_listen.assert_has_calls(
|
||||
@ -288,10 +286,8 @@ class TestNetnsCleanup(base.BaseTestCase):
|
||||
def test_kill_listen_processes_still_procs(self):
|
||||
with mock.patch.object(util, '_kill_listen_processes',
|
||||
return_value=1):
|
||||
with mock.patch('neutron.common.utils.wait_until_true')\
|
||||
as wait_until_true_mock:
|
||||
wait_until_true_mock.side_effect = (
|
||||
util.PidsInNamespaceException)
|
||||
with mock.patch.object(util, 'wait_until_no_listen_pids_namespace',
|
||||
side_effect=util.PidsInNamespaceException):
|
||||
namespace = mock.ANY
|
||||
with testtools.ExpectedException(
|
||||
util.PidsInNamespaceException):
|
||||
@ -300,13 +296,14 @@ class TestNetnsCleanup(base.BaseTestCase):
|
||||
def test_kill_listen_processes_no_procs(self):
|
||||
with mock.patch.object(util, '_kill_listen_processes',
|
||||
return_value=0) as mock_kill_listen:
|
||||
with mock.patch('neutron.common.utils.wait_until_true')\
|
||||
as wait_until_true_mock:
|
||||
with mock.patch.object(util,
|
||||
'wait_until_no_listen_pids_namespace')\
|
||||
as wait_until_mock:
|
||||
namespace = mock.ANY
|
||||
util.kill_listen_processes(namespace)
|
||||
mock_kill_listen.assert_called_once_with(namespace,
|
||||
force=False)
|
||||
self.assertFalse(wait_until_true_mock.called)
|
||||
self.assertFalse(wait_until_mock.called)
|
||||
|
||||
def _test_destroy_namespace_helper(self, force, num_devices):
|
||||
ns = 'qrouter-6e322ac7-ab50-4f53-9cdc-d1d3c1164b6d'
|
||||
|
Loading…
Reference in New Issue
Block a user