Revert "Implement "kill" method using os.kill()"

This reverts commit 4b21111eb1.

Reason for revert: This method is unstable and prone to timeouts

Change-Id: I6064d60e4d63b085046aace7683d766a79dd22da
This commit is contained in:
Rodolfo Alonso 2021-03-25 22:05:58 +00:00
parent 4b21111eb1
commit 19eb12bd29
14 changed files with 88 additions and 152 deletions

View File

@ -440,12 +440,12 @@ class HaRouter(router.RouterInfo):
pm = self._get_state_change_monitor_process_manager() pm = self._get_state_change_monitor_process_manager()
process_monitor.unregister( process_monitor.unregister(
self.router_id, IP_MONITOR_PROCESS_SERVICE) self.router_id, IP_MONITOR_PROCESS_SERVICE)
pm.disable(sig=signal.SIGTERM) pm.disable(sig=str(int(signal.SIGTERM)))
try: try:
common_utils.wait_until_true(lambda: not pm.active, common_utils.wait_until_true(lambda: not pm.active,
timeout=SIGTERM_TIMEOUT) timeout=SIGTERM_TIMEOUT)
except common_utils.WaitTimeout: except common_utils.WaitTimeout:
pm.disable(sig=signal.SIGKILL) pm.disable(sig=str(int(signal.SIGKILL)))
@staticmethod @staticmethod
def _gateway_ports_equal(port1, port2): def _gateway_ports_equal(port1, port2):

View File

@ -15,7 +15,6 @@
import abc import abc
import collections import collections
import os.path import os.path
import signal
import eventlet import eventlet
from oslo_concurrency import lockutils from oslo_concurrency import lockutils
@ -97,10 +96,9 @@ class ProcessManager(MonitoredProcess):
if self.custom_reload_callback: if self.custom_reload_callback:
self.disable(get_stop_command=self.custom_reload_callback) self.disable(get_stop_command=self.custom_reload_callback)
else: else:
self.disable(signal.SIGHUP) self.disable('HUP')
def disable(self, sig=signal.SIGKILL, get_stop_command=None): def disable(self, sig='9', get_stop_command=None):
sig = int(sig)
pid = self.pid pid = self.pid
if self.active: if self.active:
@ -111,9 +109,11 @@ class ProcessManager(MonitoredProcess):
run_as_root=self.run_as_root, run_as_root=self.run_as_root,
privsep_exec=True) privsep_exec=True)
else: else:
self._kill_process(sig, pid) cmd = self.get_kill_cmd(sig, pid)
utils.execute(cmd, run_as_root=self.run_as_root,
privsep_exec=True)
# In the case of shutting down, remove the pid file # In the case of shutting down, remove the pid file
if sig == signal.SIGKILL: if sig == '9':
utils.delete_if_exists(self.get_pid_file_name(), utils.delete_if_exists(self.get_pid_file_name(),
run_as_root=self.run_as_root) run_as_root=self.run_as_root)
elif pid: elif pid:
@ -125,14 +125,12 @@ class ProcessManager(MonitoredProcess):
LOG.debug('No %(service)s process started for %(uuid)s', LOG.debug('No %(service)s process started for %(uuid)s',
{'service': self.service, 'uuid': self.uuid}) {'service': self.service, 'uuid': self.uuid})
def _kill_process(self, sig, pid): def get_kill_cmd(self, sig, pid):
if self.kill_scripts_path: if self.kill_scripts_path:
kill_file = "%s-kill" % self.service kill_file = "%s-kill" % self.service
if os.path.isfile(os.path.join(self.kill_scripts_path, kill_file)): if os.path.isfile(os.path.join(self.kill_scripts_path, kill_file)):
utils.execute([kill_file, sig, pid], return [kill_file, sig, pid]
run_as_root=self.run_as_root) return ['kill', '-%s' % (sig), pid]
return
utils.kill_process(pid, sig, run_as_root=self.run_as_root)
def get_pid_file_name(self): def get_pid_file_name(self):
"""Returns the file name for a given kind of config file.""" """Returns the file name for a given kind of config file."""

View File

@ -479,7 +479,7 @@ class KeepalivedManager(object):
service_name=KEEPALIVED_SERVICE_NAME) service_name=KEEPALIVED_SERVICE_NAME)
pm = self.get_process() pm = self.get_process()
pm.disable(sig=signal.SIGTERM) pm.disable(sig=str(int(signal.SIGTERM)))
try: try:
utils.wait_until_true(lambda: not pm.active, utils.wait_until_true(lambda: not pm.active,
timeout=SIGTERM_TIMEOUT) timeout=SIGTERM_TIMEOUT)
@ -487,7 +487,7 @@ class KeepalivedManager(object):
LOG.warning('Keepalived process %s did not finish after SIGTERM ' LOG.warning('Keepalived process %s did not finish after SIGTERM '
'signal in %s seconds, sending SIGKILL signal', 'signal in %s seconds, sending SIGKILL signal',
pm.pid, SIGTERM_TIMEOUT) pm.pid, SIGTERM_TIMEOUT)
pm.disable(sig=signal.SIGKILL) pm.disable(sig=str(int(signal.SIGKILL)))
def check_processes(self): def check_processes(self):
keepalived_pm = self.get_process() keepalived_pm = self.get_process()

View File

@ -214,33 +214,14 @@ def find_fork_top_parent(pid):
return pid return pid
def kill_process(pid, signal, run_as_root=False, extra_ok_codes=None, def kill_process(pid, signal, run_as_root=False):
raise_exception=True): """Kill the process with the given pid using the given signal."""
"""Kill the process with the given pid using the given signal.
:param pid: (str, int) process PID
:param signal: (str, int) signal to send
:param run_as_root: (bool) execute the command as root user
:param extra_ok_codes: (list of int) list of "errno" codes
:param raise_exception: (bool) if True, if an exception occurs, it is
raised
:return: 0 if OK, "errno" code in case of muted exception (see
"extra_ok_codes")
"""
try: try:
LOG.debug("Start killing process %s, signal %s", pid, signal) execute(['kill', '-%d' % signal, pid], run_as_root=run_as_root,
if run_as_root: privsep_exec=True)
priv_utils.kill_process(pid, signal) except exceptions.ProcessExecutionError:
else: if process_is_running(pid):
os.kill(int(pid), int(signal)) raise
LOG.debug("Finish killing process %s, signal %s", pid, signal)
except OSError as exc:
with excutils.save_and_reraise_exception() as ctxt:
extra_ok_codes = extra_ok_codes or []
if exc.errno in extra_ok_codes or not raise_exception:
ctxt.reraise = False
return exc.errno
return 0
def _get_conf_base(cfg_root, uuid, ensure_conf_dir): def _get_conf_base(cfg_root, uuid, ensure_conf_dir):

View File

@ -26,8 +26,7 @@ default = priv_context.PrivContext(
caps.CAP_NET_ADMIN, caps.CAP_NET_ADMIN,
caps.CAP_DAC_OVERRIDE, caps.CAP_DAC_OVERRIDE,
caps.CAP_DAC_READ_SEARCH, caps.CAP_DAC_READ_SEARCH,
caps.CAP_SYS_PTRACE, caps.CAP_SYS_PTRACE],
caps.CAP_KILL],
) )

View File

@ -82,9 +82,3 @@ def _create_process(cmd, addl_env=None):
obj = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE, obj = subprocess.Popen(cmd, shell=False, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return obj, cmd return obj, cmd
@privileged.default.entrypoint
def kill_process(pid, signal):
"""Kill the process with the given pid using the given signal."""
os.kill(int(pid), int(signal))

View File

@ -16,7 +16,6 @@
import abc import abc
from concurrent import futures from concurrent import futures
import contextlib import contextlib
import errno
import os import os
import random import random
import re import re
@ -308,10 +307,7 @@ class RootHelperProcess(subprocess.Popen):
def kill(self, sig=signal.SIGKILL): def kill(self, sig=signal.SIGKILL):
pid = self.child_pid or str(self.pid) pid = self.child_pid or str(self.pid)
if utils.kill_process(pid, sig, run_as_root=True, utils.execute(['kill', '-%d' % sig, pid], run_as_root=True)
extra_ok_codes=[errno.ESRCH]):
LOG.debug('Process %s did not exists already so it could not be '
'killed', pid)
def read_stdout(self, timeout=None): def read_stdout(self, timeout=None):
return self._read_stream(self.stdout, timeout) return self._read_stream(self.stdout, timeout)
@ -629,8 +625,7 @@ class NamespaceFixture(fixtures.Fixture):
if self.ip_wrapper.netns.exists(self.name): if self.ip_wrapper.netns.exists(self.name):
for pid in ip_lib.list_namespace_pids(self.name): for pid in ip_lib.list_namespace_pids(self.name):
utils.kill_process(pid, signal.SIGKILL, utils.kill_process(pid, signal.SIGKILL,
run_as_root=True, run_as_root=True)
extra_ok_codes=[errno.ESRCH])
self.ip_wrapper.netns.delete(self.name) self.ip_wrapper.netns.delete(self.name)
except helpers.TestTimerTimeout: except helpers.TestTimerTimeout:
LOG.warning('Namespace %s was not deleted due to a timeout.', LOG.warning('Namespace %s was not deleted due to a timeout.',

View File

@ -12,8 +12,6 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import signal
import eventlet import eventlet
from neutron._i18n import _ from neutron._i18n import _
@ -73,7 +71,7 @@ class TestAsyncProcess(AsyncProcessTestFramework):
# Ensure that the same output is read twice # Ensure that the same output is read twice
self._check_stdout(proc) self._check_stdout(proc)
pid = proc.pid pid = proc.pid
utils.kill_process(pid, signal.SIGKILL) utils.execute(['kill', '-9', pid])
common_utils.wait_until_true( common_utils.wait_until_true(
lambda: proc.is_active() and pid != proc.pid, lambda: proc.is_active() and pid != proc.pid,
timeout=5, timeout=5,

View File

@ -12,7 +12,6 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import errno
import functools import functools
import os import os
import signal import signal
@ -42,8 +41,10 @@ class TestGetRootHelperChildPid(functional_base.BaseSudoTestCase):
sleep_pid = utils.execute( sleep_pid = utils.execute(
['ps', '--ppid', parent_pid, '-o', 'pid=']).strip() ['ps', '--ppid', parent_pid, '-o', 'pid=']).strip()
self.addCleanup( self.addCleanup(
utils.kill_process, sleep_pid, signal.SIGKILL, run_as_root=True, utils.execute,
raise_exception=False) ['kill', '-9', sleep_pid],
check_exit_code=False,
run_as_root=True)
def test_get_root_helper_child_pid_returns_first_child(self): def test_get_root_helper_child_pid_returns_first_child(self):
"""Test that the first child, not lowest child pid is returned. """Test that the first child, not lowest child pid is returned.
@ -171,55 +172,3 @@ class TestFindChildPids(functional_base.BaseSudoTestCase):
with open('/proc/sys/kernel/pid_max', 'r') as fd: with open('/proc/sys/kernel/pid_max', 'r') as fd:
pid_max = int(fd.readline().strip()) pid_max = int(fd.readline().strip())
self.assertEqual([], utils.find_child_pids(pid_max)) self.assertEqual([], utils.find_child_pids(pid_max))
class TestKillProcess(functional_base.BaseSudoTestCase):
# NOTE(ralonsoh): in 64bit systems, PID_MAX_LIMIT is 2^22
NON_EXISTING_PID = 2**22 + 1
def _test_exception(self, pid, error_no, run_as_root=False):
try:
utils.kill_process(pid, signal.SIGUSR1, run_as_root=run_as_root)
except OSError as exc:
self.assertEqual(error_no, exc.errno)
def test_root_no_such_process(self):
self._test_exception(self.NON_EXISTING_PID, errno.ESRCH,
run_as_root=True)
def test_root_no_such_proces_hidden(self):
self.assertEqual(
errno.ESRCH,
utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1,
run_as_root=True, extra_ok_codes=[errno.ESRCH]))
def test_root_exception_not_risen(self):
self.assertEqual(
errno.ESRCH,
utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1,
run_as_root=True, raise_exception=False))
def test_non_root_no_such_process(self):
self._test_exception(self.NON_EXISTING_PID, errno.ESRCH)
def test_non_root_no_such_process_hidden(self):
self.assertEqual(
errno.ESRCH,
utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1,
extra_ok_codes=[errno.ESRCH]))
def test_non_root_operation_not_permitted(self):
self._test_exception(1, errno.EPERM)
def test_non_root_operation_not_permitted_hidden(self):
self.assertEqual(
errno.EPERM,
utils.kill_process(1, signal.SIGUSR1,
extra_ok_codes=[errno.EPERM]))
def test_non_root_exception_not_risen(self):
self.assertEqual(
errno.ESRCH,
utils.kill_process(self.NON_EXISTING_PID, signal.SIGUSR1,
raise_exception=False))

View File

@ -15,7 +15,6 @@
import copy import copy
import os.path import os.path
import signal
from unittest import mock from unittest import mock
import eventlet import eventlet
@ -326,7 +325,7 @@ class DHCPAgentOVSTestCase(DHCPAgentOVSTestFramework):
pm, network = self._spawn_network_metadata_proxy() pm, network = self._spawn_network_metadata_proxy()
old_pid = pm.pid old_pid = pm.pid
utils.kill_process(old_pid, signal.SIGKILL, run_as_root=True) utils.execute(['kill', '-9', old_pid], run_as_root=True)
common_utils.wait_until_true( common_utils.wait_until_true(
lambda: pm.active and pm.pid != old_pid, lambda: pm.active and pm.pid != old_pid,
timeout=5, timeout=5,

View File

@ -109,7 +109,8 @@ class TestBasicRouterOperations(base.BaseTestCase):
mock_pm.active = False mock_pm.active = False
ri.destroy_state_change_monitor(mock_pm) ri.destroy_state_change_monitor(mock_pm)
mock_pm.disable.assert_called_once_with(sig=signal.SIGTERM) mock_pm.disable.assert_called_once_with(
sig=str(int(signal.SIGTERM)))
def test_destroy_state_change_monitor_force(self): def test_destroy_state_change_monitor_force(self):
ri = self._create_router(mock.MagicMock()) ri = self._create_router(mock.MagicMock())

View File

@ -13,7 +13,6 @@
# under the License. # under the License.
import os.path import os.path
import signal
from unittest import mock from unittest import mock
from neutron_lib import fixture as lib_fixtures from neutron_lib import fixture as lib_fixtures
@ -194,7 +193,7 @@ class TestProcessManager(base.BaseTestCase):
with mock.patch.object(ep.ProcessManager, 'disable') as disable: with mock.patch.object(ep.ProcessManager, 'disable') as disable:
manager = ep.ProcessManager(self.conf, 'uuid', namespace='ns') manager = ep.ProcessManager(self.conf, 'uuid', namespace='ns')
manager.reload_cfg() manager.reload_cfg()
disable.assert_called_once_with(signal.SIGHUP) disable.assert_called_once_with('HUP')
def test_reload_cfg_with_custom_reload_callback(self): def test_reload_cfg_with_custom_reload_callback(self):
reload_callback = mock.sentinel.callback reload_callback = mock.sentinel.callback
@ -228,8 +227,10 @@ class TestProcessManager(base.BaseTestCase):
with mock.patch.object(ep, 'utils') as utils: with mock.patch.object(ep, 'utils') as utils:
manager.disable() manager.disable()
utils.kill_process.assert_has_calls([ utils.assert_has_calls([
mock.call(4, int(signal.SIGKILL), run_as_root=False)]) mock.call.execute(['kill', '-9', 4],
run_as_root=False,
privsep_exec=True)])
def test_disable_namespace(self): def test_disable_namespace(self):
with mock.patch.object(ep.ProcessManager, 'pid') as pid: with mock.patch.object(ep.ProcessManager, 'pid') as pid:
@ -241,23 +242,10 @@ class TestProcessManager(base.BaseTestCase):
with mock.patch.object(ep, 'utils') as utils: with mock.patch.object(ep, 'utils') as utils:
manager.disable() manager.disable()
utils.kill_process.assert_has_calls([ utils.assert_has_calls([
mock.call(4, int(signal.SIGKILL), run_as_root=True)]) mock.call.execute(['kill', '-9', 4],
run_as_root=True,
def test_disable_with_and_without_namespace(self): privsep_exec=True)])
namespaces = ['ns', None]
for namespace in namespaces:
with mock.patch.object(ep.ProcessManager, 'pid') as pid:
pid.__get__ = mock.Mock(return_value=4)
with mock.patch.object(ep.ProcessManager, 'active') as active:
active.__get__ = mock.Mock(return_value=True)
manager = ep.ProcessManager(self.conf, 'uuid',
namespace=namespace)
with mock.patch.object(ep, 'utils') as utils:
manager.disable()
call = mock.call(4, int(signal.SIGKILL),
run_as_root=bool(namespace))
utils.kill_process.assert_has_calls([call])
def test_disable_not_active(self): def test_disable_not_active(self):
with mock.patch.object(ep.ProcessManager, 'pid') as pid: with mock.patch.object(ep.ProcessManager, 'pid') as pid:
@ -282,10 +270,13 @@ class TestProcessManager(base.BaseTestCase):
def _test_disable_custom_kill_script(self, kill_script_exists, namespace, def _test_disable_custom_kill_script(self, kill_script_exists, namespace,
kill_scripts_path='test-path/'): kill_scripts_path='test-path/'):
cfg.CONF.set_override("kill_scripts_path", kill_scripts_path, "AGENT") cfg.CONF.set_override("kill_scripts_path", kill_scripts_path, "AGENT")
process_pid = 4 if kill_script_exists:
expected_cmd = ['test-service-kill', '9', 4]
else:
expected_cmd = ['kill', '-9', 4]
with mock.patch.object(ep.ProcessManager, 'pid') as pid: with mock.patch.object(ep.ProcessManager, 'pid') as pid:
pid.__get__ = mock.Mock(return_value=process_pid) pid.__get__ = mock.Mock(return_value=4)
with mock.patch.object(ep.ProcessManager, 'active') as active: with mock.patch.object(ep.ProcessManager, 'active') as active:
active.__get__ = mock.Mock(return_value=True) active.__get__ = mock.Mock(return_value=True)
manager = ep.ProcessManager( manager = ep.ProcessManager(
@ -295,15 +286,9 @@ class TestProcessManager(base.BaseTestCase):
mock.patch.object(os.path, 'isfile', mock.patch.object(os.path, 'isfile',
return_value=kill_script_exists): return_value=kill_script_exists):
manager.disable() manager.disable()
if kill_script_exists: utils.execute.assert_called_with(
expected_cmd = ['test-service-kill', expected_cmd, run_as_root=bool(namespace),
signal.SIGKILL, process_pid] privsep_exec=True)
utils.execute.assert_called_with(
expected_cmd, run_as_root=bool(namespace))
else:
utils.kill_process.assert_called_once_with(
process_pid, signal.SIGKILL,
run_as_root=bool(namespace))
def test_disable_custom_kill_script_no_namespace(self): def test_disable_custom_kill_script_no_namespace(self):
self._test_disable_custom_kill_script( self._test_disable_custom_kill_script(

View File

@ -634,7 +634,8 @@ class KeepalivedManagerTestCase(base.BaseTestCase):
mock_get_process.return_value = process mock_get_process.return_value = process
process.active = False process.active = False
self.keepalived_manager.disable() self.keepalived_manager.disable()
process.disable.assert_called_once_with(sig=int(signal.SIGTERM)) process.disable.assert_called_once_with(
sig=str(int(signal.SIGTERM)))
def test_destroy_force(self): def test_destroy_force(self):
mock_get_process = self.mock_get_process.start() mock_get_process = self.mock_get_process.start()
@ -644,5 +645,5 @@ class KeepalivedManagerTestCase(base.BaseTestCase):
process.active = True process.active = True
self.keepalived_manager.disable() self.keepalived_manager.disable()
process.disable.assert_has_calls([ process.disable.assert_has_calls([
mock.call(sig=signal.SIGTERM), mock.call(sig=str(int(signal.SIGTERM))),
mock.call(sig=signal.SIGKILL)]) mock.call(sig=str(int(signal.SIGKILL)))])

View File

@ -13,9 +13,12 @@
# under the License. # under the License.
import copy import copy
import signal
import socket import socket
from unittest import mock from unittest import mock
import testtools
from neutron_lib import exceptions from neutron_lib import exceptions
from neutron_lib import fixture as lib_fixtures from neutron_lib import fixture as lib_fixtures
from oslo_config import cfg from oslo_config import cfg
@ -224,6 +227,39 @@ class TestFindForkTopParent(base.BaseTestCase):
pid_invoked_with_cmdline_retvals=[True, True, True]) pid_invoked_with_cmdline_retvals=[True, True, True])
class TestKillProcess(base.BaseTestCase):
def _test_kill_process(self, pid, raise_exception=False,
kill_signal=signal.SIGKILL, pid_killed=True):
if raise_exception:
exc = exceptions.ProcessExecutionError('', returncode=0)
else:
exc = None
with mock.patch.object(utils, 'execute',
side_effect=exc) as mock_execute:
with mock.patch.object(utils, 'process_is_running',
return_value=not pid_killed):
utils.kill_process(pid, kill_signal, run_as_root=True)
mock_execute.assert_called_with(['kill', '-%d' % kill_signal, pid],
run_as_root=True, privsep_exec=True)
def test_kill_process_returns_none_for_valid_pid(self):
self._test_kill_process('1')
def test_kill_process_returns_none_for_stale_pid(self):
self._test_kill_process('1', raise_exception=True)
def test_kill_process_raises_exception_for_execute_exception(self):
with testtools.ExpectedException(exceptions.ProcessExecutionError):
# Simulate that the process is running after trying to kill due to
# any reason such as, for example, Permission denied
self._test_kill_process('1', raise_exception=True,
pid_killed=False)
def test_kill_process_with_different_signal(self):
self._test_kill_process('1', kill_signal=signal.SIGTERM)
class TestGetCmdlineFromPid(base.BaseTestCase): class TestGetCmdlineFromPid(base.BaseTestCase):
def setUp(self): def setUp(self):