diff --git a/doc/source/admin/config-services-agent.rst b/doc/source/admin/config-services-agent.rst index 22fdc4aebff..f3b36f1fe4c 100644 --- a/doc/source/admin/config-services-agent.rst +++ b/doc/source/admin/config-services-agent.rst @@ -43,3 +43,36 @@ vary between hosts in a neutron deployment such as the ``local_ip`` for an L2 agent. If any agent requires access to additional external services beyond the neutron RPC, those endpoints should be defined in the agent-specific configuration file (for example, nova metadata for metadata agent). + +External processes run by agents +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Some neutron agents, like DHCP, Metadata or L3, often run external +processes to provide some of their functionalities. It may be keepalived, +dnsmasq, haproxy or some other process. +Neutron agents are responsible for spawning and killing such processes when +necessary. By default, to kill such processes, agents use a simple ``kill`` +command, but in some cases, like for example when those additional services +are running inside containers, it may be not a good solution. +To address this problem, operators should use the ``AGENT`` config group option +``kill_scripts_path`` to configure a path to where ``kill scripts`` for such +processes live. By default, it is set to ``/etc/neutron/kill_scripts/``. +If option ``kill_scripts_path`` is changed in the config to the different +location, ``exec_dirs`` in ``/etc/rootwrap.conf`` should be changed accordingly. +If ``kill_scripts_path`` is set, every time neutron has to kill a process, +for example ``dnsmasq``, it will look in this directory for a file with the name +``-kill``. So for ``dnsmasq`` process it will look for a +``dnsmasq-kill`` script. If such a file exists there, it will be called +instead of using the ``kill`` command. + +Kill scripts are called with two parameters: + +.. code-block:: + + -kill + +where: ```` is the signal, same as with the ``kill`` command, for example +``9`` or ``SIGKILL``; and ```` is pid of the process to kill. + +This external script should then handle killing of the given process as neutron +will not call the ``kill`` command for it anymore. diff --git a/etc/neutron/rootwrap.d/dhcp.filters b/etc/neutron/rootwrap.d/dhcp.filters index 2b2d9a7d21f..0502eb63fdf 100644 --- a/etc/neutron/rootwrap.d/dhcp.filters +++ b/etc/neutron/rootwrap.d/dhcp.filters @@ -15,6 +15,8 @@ dnsmasq: CommandFilter, dnsmasq, root # neutron/agent/linux/dhcp.py kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP, -15 kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP, -15 +# dnsmasq kill script filter +kill_dnsmasq_script: CommandFilter, dnsmasq-kill, root ovs-vsctl: CommandFilter, ovs-vsctl, root mm-ctl: CommandFilter, mm-ctl, root diff --git a/etc/neutron/rootwrap.d/dibbler.filters b/etc/neutron/rootwrap.d/dibbler.filters index 7ba7015c25e..18d20782604 100644 --- a/etc/neutron/rootwrap.d/dibbler.filters +++ b/etc/neutron/rootwrap.d/dibbler.filters @@ -15,3 +15,7 @@ # prefix_delegation_agent dibbler-client: CommandFilter, dibbler-client, root kill_dibbler-client: KillFilter, root, dibbler-client, -9 +# dibbler kill script filter +kill_dibbler_script: CommandFilter, dibbler-kill, root +# dibbler-client kill script filter +kill_dibbler-client_script: CommandFilter, dibbler-client-kill, root diff --git a/etc/neutron/rootwrap.d/l3.filters b/etc/neutron/rootwrap.d/l3.filters index 0382abaec72..cdfc85bb9c1 100644 --- a/etc/neutron/rootwrap.d/l3.filters +++ b/etc/neutron/rootwrap.d/l3.filters @@ -19,6 +19,8 @@ radvd: CommandFilter, radvd, root # haproxy haproxy: RegExpFilter, haproxy, root, haproxy, -f, .* kill_haproxy: KillFilter, root, haproxy, -15, -9, -HUP +# haproxy kill script filter +kill_haproxy_script: CommandFilter, haproxy-kill, root kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -15, -9, -HUP kill_radvd: KillFilter, root, /sbin/radvd, -15, -9, -HUP @@ -52,6 +54,8 @@ ip6tables-restore: CommandFilter, ip6tables-restore, root # Keepalived keepalived: CommandFilter, keepalived, root kill_keepalived: KillFilter, root, keepalived, -HUP, -15, -9 +# keepalived kill script filter +kill_keepalived_script: CommandFilter, keepalived-kill, root # l3 agent to delete floatingip's conntrack state conntrack: CommandFilter, conntrack, root @@ -75,3 +79,5 @@ kill_keepalived_monitor_py37: KillFilter, root, python3.7, -15 # absolute path kill_keepalived_monitor_platform_py: KillFilter, root, /usr/libexec/platform-python, -15 kill_keepalived_monitor_platform_py36: KillFilter, root, /usr/libexec/platform-python3.6, -15 +# neutron-keepalived-state-change-monitor kill script filter +kill_neutron-keepalived-state-change-monitor_script: CommandFilter, neutron-keepalived-state-change-monitor-kill, root diff --git a/etc/rootwrap.conf b/etc/rootwrap.conf index 3a6b11f44dc..3b56b6a3e49 100644 --- a/etc/rootwrap.conf +++ b/etc/rootwrap.conf @@ -10,7 +10,7 @@ filters_path=/etc/neutron/rootwrap.d,/usr/share/neutron/rootwrap # explicitely specify a full path (separated by ',') # If not specified, defaults to system PATH environment variable. # These directories MUST all be only writeable by root ! -exec_dirs=/sbin,/usr/sbin,/bin,/usr/bin,/usr/local/bin,/usr/local/sbin +exec_dirs=/sbin,/usr/sbin,/bin,/usr/bin,/usr/local/bin,/usr/local/sbin,/etc/neutron/kill_scripts # Enable logging to syslog # Default value is False diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index dd2084a8507..a748c6c355d 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -35,6 +35,8 @@ LOG = logging.getLogger(__name__) HA_DEV_PREFIX = 'ha-' IP_MONITOR_PROCESS_SERVICE = 'ip_monitor' SIGTERM_TIMEOUT = 10 +KEEPALIVED_STATE_CHANGE_MONITOR_SERVICE_NAME = ( + "neutron-keepalived-state-change-monitor") # TODO(liuyulong): move to neutron-lib? STATE_CHANGE_PROC_NAME = 'neutron-keepalived-state-change' @@ -360,6 +362,7 @@ class HaRouter(router.RouterInfo): self.agent_conf, '%s.monitor' % self.router_id, self.ha_namespace, + service=KEEPALIVED_STATE_CHANGE_MONITOR_SERVICE_NAME, default_cmd_callback=self._get_state_change_monitor_callback()) def _get_state_change_monitor_callback(self): diff --git a/neutron/agent/linux/dhcp.py b/neutron/agent/linux/dhcp.py index 327c4e8ffdd..1cb3313b5b4 100644 --- a/neutron/agent/linux/dhcp.py +++ b/neutron/agent/linux/dhcp.py @@ -242,6 +242,7 @@ class DhcpLocalProcess(DhcpBase): conf=self.conf, uuid=self.network.id, namespace=self.network.namespace, + service=DNSMASQ_SERVICE_NAME, default_cmd_callback=cmd_callback, pid_file=self.get_conf_file_name('pid'), run_as_root=True) diff --git a/neutron/agent/linux/external_process.py b/neutron/agent/linux/external_process.py index b1d9c3cbc7e..36c8e8e95c4 100644 --- a/neutron/agent/linux/external_process.py +++ b/neutron/agent/linux/external_process.py @@ -66,6 +66,7 @@ class ProcessManager(MonitoredProcess): self.pid_file = pid_file self.run_as_root = run_as_root or self.namespace is not None self.custom_reload_callback = custom_reload_callback + self.kill_scripts_path = cfg.CONF.AGENT.kill_scripts_path if service: self.service_pid_fname = 'pid.' + service @@ -105,7 +106,7 @@ class ProcessManager(MonitoredProcess): ip_wrapper.netns.execute(cmd, addl_env=self.cmd_addl_env, run_as_root=self.run_as_root) else: - cmd = ['kill', '-%s' % (sig), pid] + cmd = self.get_kill_cmd(sig, pid) utils.execute(cmd, run_as_root=self.run_as_root) # In the case of shutting down, remove the pid file if sig == '9': @@ -117,6 +118,13 @@ class ProcessManager(MonitoredProcess): else: LOG.debug('No process started for %s', self.uuid) + def get_kill_cmd(self, sig, pid): + if self.kill_scripts_path: + kill_file = "%s-kill" % self.service + if os.path.isfile(os.path.join(self.kill_scripts_path, kill_file)): + return [kill_file, sig, pid] + return ['kill', '-%s' % (sig), pid] + def get_pid_file_name(self): """Returns the file name for a given kind of config file.""" if self.pid_file: diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index edcd837fb28..44dd054a16d 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -457,6 +457,7 @@ class KeepalivedManager(object): cfg.CONF, self.resource_id, self.namespace, + service=KEEPALIVED_SERVICE_NAME, pids_path=self.conf_path) def _get_vrrp_process(self, pid_file): diff --git a/neutron/agent/metadata/driver.py b/neutron/agent/metadata/driver.py index 16e78a91da2..93f78b199b1 100644 --- a/neutron/agent/metadata/driver.py +++ b/neutron/agent/metadata/driver.py @@ -35,6 +35,7 @@ from neutron.agent.linux import external_process LOG = logging.getLogger(__name__) METADATA_SERVICE_NAME = 'metadata-proxy' +HAPROXY_SERVICE = 'haproxy' PROXY_CONFIG_DIR = "ns-metadata-proxy" _HAPROXY_CONFIG_TEMPLATE = """ @@ -220,7 +221,7 @@ class MetadataDriver(object): conf.state_path, pid_file) haproxy.create_config_file() - proxy_cmd = ['haproxy', + proxy_cmd = [HAPROXY_SERVICE, '-f', haproxy.cfg_path] return proxy_cmd @@ -260,6 +261,7 @@ class MetadataDriver(object): conf=conf, uuid=router_id, namespace=ns_name, + service=HAPROXY_SERVICE, default_cmd_callback=callback) diff --git a/neutron/conf/agent/common.py b/neutron/conf/agent/common.py index fd31fb0a39e..84f9f5446c4 100644 --- a/neutron/conf/agent/common.py +++ b/neutron/conf/agent/common.py @@ -145,6 +145,15 @@ PROCESS_MONITOR_OPTS = [ cfg.IntOpt('check_child_processes_interval', default=60, help=_('Interval between checks of child process liveness ' '(seconds), use 0 to disable')), + cfg.StrOpt('kill_scripts_path', default='/etc/neutron/kill_scripts/', + help=_('Location of scripts used to kill external processes. ' + 'Names of scripts here must follow the pattern: ' + '"-kill" where is name of ' + 'the process which should be killed using this script. ' + 'For example, kill script for dnsmasq process should be ' + 'named "dnsmasq-kill". ' + 'If path is set to None, then default "kill" command ' + 'will be used to stop processes.')), ] AVAILABILITY_ZONE_OPTS = [ diff --git a/neutron/tests/functional/agent/l3/framework.py b/neutron/tests/functional/agent/l3/framework.py index e91b6801a8f..ee9f74fa8bb 100644 --- a/neutron/tests/functional/agent/l3/framework.py +++ b/neutron/tests/functional/agent/l3/framework.py @@ -34,6 +34,7 @@ from neutron.agent.linux import external_process from neutron.agent.linux import interface from neutron.agent.linux import ip_lib from neutron.agent.linux import keepalived +from neutron.agent.metadata import driver as metadata_driver from neutron.common import utils as common_utils from neutron.conf.agent import common as agent_config from neutron.conf import common as common_config @@ -398,7 +399,8 @@ class L3AgentTestFramework(base.BaseSudoTestCase): pm = external_process.ProcessManager( conf, router.router_id, - router.ns_name) + router.ns_name, + service=metadata_driver.HAPROXY_SERVICE) return pm.active def device_exists_with_ips_and_mac(self, expected_device, name_getter, diff --git a/neutron/tests/functional/agent/test_dhcp_agent.py b/neutron/tests/functional/agent/test_dhcp_agent.py index 98b5ae3df51..fc03a8cc84f 100644 --- a/neutron/tests/functional/agent/test_dhcp_agent.py +++ b/neutron/tests/functional/agent/test_dhcp_agent.py @@ -32,6 +32,7 @@ from neutron.agent.linux import external_process from neutron.agent.linux import interface from neutron.agent.linux import ip_lib from neutron.agent.linux import utils +from neutron.agent.metadata import driver as metadata_driver from neutron.common import utils as common_utils from neutron.conf.agent import common as config from neutron.tests.common import net_helpers @@ -256,7 +257,8 @@ class DHCPAgentOVSTestFramework(base.BaseSudoTestCase): return external_process.ProcessManager( self.conf, network.id, - network.namespace) + network.namespace, + service=metadata_driver.HAPROXY_SERVICE) class DHCPAgentOVSTestCase(DHCPAgentOVSTestFramework): diff --git a/neutron/tests/unit/agent/dhcp/test_agent.py b/neutron/tests/unit/agent/dhcp/test_agent.py index a6db52d63a7..0a5f9c9f86c 100644 --- a/neutron/tests/unit/agent/dhcp/test_agent.py +++ b/neutron/tests/unit/agent/dhcp/test_agent.py @@ -693,6 +693,7 @@ class TestDhcpAgentEventHandler(base.BaseTestCase): return mock.call(conf=cfg.CONF, uuid=FAKE_NETWORK_UUID, namespace=ns, + service='haproxy', default_cmd_callback=mock.ANY) def _enable_dhcp_helper(self, network, enable_isolated_metadata=False, diff --git a/neutron/tests/unit/agent/linux/test_external_process.py b/neutron/tests/unit/agent/linux/test_external_process.py index 866a284514b..732e9e52340 100644 --- a/neutron/tests/unit/agent/linux/test_external_process.py +++ b/neutron/tests/unit/agent/linux/test_external_process.py @@ -15,6 +15,7 @@ import os.path import mock +from oslo_config import cfg from oslo_utils import fileutils import psutil @@ -244,6 +245,48 @@ class TestProcessManager(base.BaseTestCase): manager.disable() debug.assert_called_once_with(mock.ANY, mock.ANY) + def _test_disable_custom_kill_script(self, kill_script_exists, namespace, + kill_scripts_path='test-path/'): + cfg.CONF.set_override("kill_scripts_path", kill_scripts_path, "AGENT") + if kill_script_exists: + expected_cmd = ['test-service-kill', '9', 4] + else: + expected_cmd = ['kill', '-9', 4] + + with mock.patch.object(ep.ProcessManager, 'pid') as pid: + pid.__get__ = mock.Mock(return_value=4) + with mock.patch.object(ep.ProcessManager, 'active') as active: + active.__get__ = mock.Mock(return_value=True) + manager = ep.ProcessManager( + self.conf, 'uuid', namespace=namespace, + service='test-service') + with mock.patch.object(ep, 'utils') as utils, \ + mock.patch.object(os.path, 'isfile', + return_value=kill_script_exists): + manager.disable() + utils.execute.assert_called_with( + expected_cmd, run_as_root=bool(namespace)) + + def test_disable_custom_kill_script_no_namespace(self): + self._test_disable_custom_kill_script( + kill_script_exists=True, namespace=None) + + def test_disable_custom_kill_script_namespace(self): + self._test_disable_custom_kill_script( + kill_script_exists=True, namespace="ns") + + def test_disable_custom_kill_script_no_kill_script_no_namespace(self): + self._test_disable_custom_kill_script( + kill_script_exists=False, namespace=None) + + def test_disable_custom_kill_script_no_kill_script_namespace(self): + self._test_disable_custom_kill_script( + kill_script_exists=False, namespace="ns") + + def test_disable_custom_kill_script_namespace_no_path(self): + self._test_disable_custom_kill_script( + kill_script_exists=False, namespace="ns", kill_scripts_path=None) + def test_get_pid_file_name_default(self): manager = ep.ProcessManager(self.conf, 'uuid') retval = manager.get_pid_file_name() diff --git a/releasenotes/notes/add-custom-kill-scripts-af405ba49142d59c.yaml b/releasenotes/notes/add-custom-kill-scripts-af405ba49142d59c.yaml new file mode 100644 index 00000000000..2165e1414f6 --- /dev/null +++ b/releasenotes/notes/add-custom-kill-scripts-af405ba49142d59c.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Added support for custom scripts used to kill external processes managed by + neutron agents, such as ``dnsmasq`` or ``keepalived``. Such custom scripts, + if defined, will be used instead default ``kill`` command to kill such + external processes.