Add a swift-reload command

Previously, WSGI server systemd unit files might have used something like

   ExecReload=kill -USR1 $MAINPID

This was risky; in the related change, reloads were made safer, but
required more than one ExecReload line. Meanwhile, systemd docs
(https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecReload=)
say

> It is strongly recommended to set ExecReload= to a command that
> not only triggers a configuration reload of the daemon, but also
> synchronously waits for it to complete.

which *neither* set of ExecReloads would do.

Now, add a new swift-reload command which, given a pid,

   * validates that the PID seems to belong to a Swift WSGI server
     manager process,
   * checks that the config used by that PID is still valid,
   * signals the PID to perform a seamless reload, and
   * waits for the reload to complete by monitoring the PID's children.

As a result, WSGI server systemd unit files can now use something like

   ExecReload=swift-reload $MAINPID

to follow systemd recommendations.

Change-Id: Ifcadd2f8427f107aae1921cdd311f7973b0312e1
Related-Change: I9e5e158ce8be92535430b9cabf040063f5188bf4
This commit is contained in:
Tim Burke 2022-03-10 11:44:39 -08:00
parent 9191a32e2e
commit 212525118c
4 changed files with 374 additions and 3 deletions

View File

@ -91,6 +91,7 @@ keystone =
console_scripts =
swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main
swift-container-deleter = swift.cli.container_deleter:main
swift-reload = swift.cli.reload:main
paste.app_factory =
proxy = swift.proxy.server:app_factory

141
swift/cli/reload.py Executable file
View File

@ -0,0 +1,141 @@
# Copyright (c) 2022 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Safely reload WSGI servers while minimizing client downtime and errors by
* validating that the process is a Swift WSGI server manager,
* checking that the configuration file used is valid,
* sending the "seamless reload" signal, and
* waiting for the reload to complete.
"""
from __future__ import print_function
import argparse
import errno
import os
import os.path
import signal
import subprocess
import sys
import time
from swift.common.manager import get_child_pids
EXIT_BAD_PID = 2 # similar to argparse exiting 2 on an unknown arg
EXIT_RELOAD_FAILED = 1
EXIT_RELOAD_TIMEOUT = 128 + errno.ETIMEDOUT
def validate_manager_pid(pid):
try:
with open('/proc/%d/cmdline' % pid, 'r') as fp:
cmd = fp.read().strip('\x00').split('\x00')
sid = os.getsid(pid)
except (IOError, OSError):
print("Failed to get process information for %s" % pid,
file=sys.stderr)
exit(EXIT_BAD_PID)
scripts = [os.path.basename(c) for c in cmd
if '/bin/' in c and '/bin/python' not in c]
if len(scripts) != 1 or not scripts[0].startswith("swift-"):
print("Non-swift process: %r" % ' '.join(cmd), file=sys.stderr)
exit(EXIT_BAD_PID)
if scripts[0] not in {"swift-proxy-server", "swift-account-server",
"swift-container-server", "swift-object-server"}:
print("Process does not support config checks: %s" % scripts[0],
file=sys.stderr)
exit(EXIT_BAD_PID)
if sid != pid:
print("Process appears to be a %s worker, not a manager. "
"Did you mean %s?" % (scripts[0], sid), file=sys.stderr)
exit(EXIT_BAD_PID)
return cmd, scripts[0]
def main(args=None):
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("pid", type=int,
help="server PID which should be reloaded")
wait_group = parser.add_mutually_exclusive_group()
wait_group.add_argument("-t", "--timeout", type=float, default=300.0,
help="max time to wait for reload to complete")
wait_group.add_argument("-w", "--no-wait",
action="store_false", dest="wait",
help="skip waiting for reload to complete")
parser.add_argument("-v", "--verbose", action="store_true",
help="display more information as the process reloads")
args = parser.parse_args(args)
cmd, script = validate_manager_pid(args.pid)
if args.verbose:
print("Checking config for %s" % script)
try:
subprocess.check_call(cmd + ["--test-config"])
except subprocess.CalledProcessError:
print("Failed to validate config", file=sys.stderr)
exit(EXIT_RELOAD_FAILED)
if args.wait:
try:
original_children = get_child_pids(args.pid)
children_since_reload = set()
if args.verbose:
print("Sending USR1 signal")
os.kill(args.pid, signal.SIGUSR1)
start = time.time()
while time.time() - start < args.timeout:
children = get_child_pids(args.pid)
new_children = (children - original_children
- children_since_reload)
if new_children:
if args.verbose:
print("Found new children: %s" % ", ".join(
str(pid) for pid in new_children))
children_since_reload |= new_children
if children_since_reload - children:
# At least one new child exited; presumably, it was
# the temporary child waiting to shutdown sockets
break
# We want this to be fairly low, since the temporary child
# may not hang around very long
time.sleep(0.1)
else:
print("Timed out reloading %s" % script, file=sys.stderr)
exit(EXIT_RELOAD_TIMEOUT)
except subprocess.CalledProcessError:
# This could pop during any of the calls to get_child_pids
print("Process seems to have died!", file=sys.stderr)
exit(EXIT_RELOAD_FAILED)
else: # --no-wait
if args.verbose:
print("Sending USR1 signal")
os.kill(args.pid, signal.SIGUSR1)
print("Reloaded %s" % script)
if __name__ == "__main__":
main()

View File

@ -180,6 +180,17 @@ def kill_group(pid, sig):
os.kill(-pid, sig)
def get_child_pids(pid):
"""
Get the current set of all child PIDs for a PID.
:param pid: process id
"""
output = subprocess.check_output(
["ps", "--ppid", str(pid), "--no-headers", "-o", "pid"])
return {int(pid) for pid in output.split()}
def format_server_name(servername):
"""
Formats server name as swift compatible server names
@ -700,9 +711,7 @@ class Server(object):
print('Removing pid file %s with invalid pid' % pid_file)
remove_file(pid_file)
continue
ps_cmd = ['ps', '--ppid', str(pid), '--no-headers', '-o', 'pid']
for pid in subprocess.check_output(ps_cmd).split():
pid = int(pid)
for pid in get_child_pids(pid):
if self._signal_pid(sig, pid, pid_file, kwargs.get('verbose')):
pids[pid] = pid_file
return pids

View File

@ -0,0 +1,220 @@
# Copyright (c) 2022 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mock
import signal
import six
import subprocess
import unittest
from six.moves import StringIO
from swift.cli import reload
@mock.patch('sys.stderr', new_callable=StringIO)
class TestValidateManagerPid(unittest.TestCase):
def test_good(self, mock_stderr):
cmd_args = [
'/usr/local/bin/python3.9',
'/usr/local/bin/swift-proxy-server',
'/etc/swift/proxy-server.conf',
'some',
'extra',
'args',
]
with mock.patch.object(reload, 'open', mock.mock_open(
read_data='\x00'.join(cmd_args) + '\x00'
)) as mock_open, mock.patch('os.getsid', return_value=123):
self.assertEqual(reload.validate_manager_pid(123), (
cmd_args,
'swift-proxy-server',
))
self.assertEqual(mock_open.mock_calls[0],
mock.call('/proc/123/cmdline', 'r'))
def test_open_error(self, mock_stderr):
with mock.patch.object(reload, 'open', side_effect=OSError), \
self.assertRaises(SystemExit) as caught:
reload.validate_manager_pid(123)
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
self.assertEqual(mock_stderr.getvalue(),
'Failed to get process information for 123\n')
def test_non_python(self, mock_stderr):
with mock.patch.object(reload, 'open', mock.mock_open(
read_data='/usr/bin/rsync\x00'
)), mock.patch('os.getsid', return_value=56), \
self.assertRaises(SystemExit) as caught:
reload.validate_manager_pid(56)
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
self.assertEqual(mock_stderr.getvalue(),
"Non-swift process: '/usr/bin/rsync'\n")
def test_non_swift(self, mock_stderr):
with mock.patch.object(reload, 'open', mock.mock_open(
read_data='/usr/bin/python\x00some-script\x00'
)), mock.patch('os.getsid', return_value=123), \
self.assertRaises(SystemExit) as caught:
reload.validate_manager_pid(123)
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
self.assertEqual(mock_stderr.getvalue(),
"Non-swift process: '/usr/bin/python some-script'\n")
def test_worker(self, mock_stderr):
cmd_args = [
'/usr/bin/python3.9',
'/usr/bin/swift-proxy-server',
'/etc/swift/proxy-server.conf',
]
with mock.patch.object(reload, 'open', mock.mock_open(
read_data='\x00'.join(cmd_args) + '\x00'
)) as mock_open, mock.patch('os.getsid', return_value=123), \
self.assertRaises(SystemExit) as caught:
reload.validate_manager_pid(56)
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
self.assertEqual(mock_stderr.getvalue(),
'Process appears to be a swift-proxy-server worker, '
'not a manager. Did you mean 123?\n')
self.assertEqual(mock_open.mock_calls[0],
mock.call('/proc/56/cmdline', 'r'))
def test_non_server(self, mock_stderr):
cmd_args = [
'/usr/bin/swift-ring-builder',
'/etc/swift/object.builder',
'rebalance',
]
with mock.patch.object(reload, 'open', mock.mock_open(
read_data='\x00'.join(cmd_args) + '\x00'
)) as mock_open, mock.patch('os.getsid', return_value=123), \
self.assertRaises(SystemExit) as caught:
reload.validate_manager_pid(123)
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
self.assertEqual(mock_stderr.getvalue(),
'Process does not support config checks: '
'swift-ring-builder\n')
self.assertEqual(mock_open.mock_calls[0],
mock.call('/proc/123/cmdline', 'r'))
class TestMain(unittest.TestCase):
def setUp(self):
patcher = mock.patch('sys.stderr', new_callable=StringIO)
self.mock_stderr = patcher.start()
self.addCleanup(patcher.stop)
patcher = mock.patch('subprocess.check_call')
self.mock_check_call = patcher.start()
self.addCleanup(patcher.stop)
patcher = mock.patch.object(reload, 'validate_manager_pid')
self.mock_validate = patcher.start()
self.addCleanup(patcher.stop)
patcher = mock.patch.object(reload, 'get_child_pids')
self.mock_get_child_pids = patcher.start()
self.addCleanup(patcher.stop)
patcher = mock.patch('os.kill')
self.mock_kill = patcher.start()
self.addCleanup(patcher.stop)
def test_good(self):
self.mock_validate.return_value = (
[
'/usr/bin/swift-proxy-server',
'/etc/swift/proxy-server.conf'
],
'swift-proxy-server',
)
self.mock_get_child_pids.side_effect = [
{'worker1', 'worker2'},
{'worker1', 'worker2', 'foster parent'},
{'worker1', 'worker2', 'foster parent', 'new worker'},
{'worker1', 'worker2', 'new worker'},
]
self.assertIsNone(reload.main(['123']))
self.assertEqual(self.mock_check_call.mock_calls, [mock.call([
'/usr/bin/swift-proxy-server',
'/etc/swift/proxy-server.conf',
'--test-config',
])])
self.assertEqual(self.mock_kill.mock_calls, [
mock.call(123, signal.SIGUSR1),
])
@mock.patch('time.time', side_effect=[1, 10, 100, 400])
def test_timeout(self, mock_time):
self.mock_validate.return_value = (
[
'/usr/bin/python3',
'/usr/bin/swift-proxy-server',
'/etc/swift/proxy-server.conf'
],
'swift-proxy-server',
)
self.mock_get_child_pids.side_effect = [
{'worker1', 'worker2'},
{'worker1', 'worker2', 'foster parent'},
{'worker1', 'worker2', 'foster parent', 'new worker'},
]
with self.assertRaises(SystemExit) as caught:
reload.main(['123'])
self.assertEqual(caught.exception.args, (reload.EXIT_RELOAD_TIMEOUT,))
self.assertEqual(self.mock_check_call.mock_calls, [mock.call([
'/usr/bin/python3',
'/usr/bin/swift-proxy-server',
'/etc/swift/proxy-server.conf',
'--test-config',
])])
self.assertEqual(self.mock_kill.mock_calls, [
mock.call(123, signal.SIGUSR1),
])
self.assertEqual(self.mock_stderr.getvalue(),
'Timed out reloading swift-proxy-server\n')
def test_check_failed(self):
self.mock_validate.return_value = (
[
'/usr/bin/python3',
'/usr/bin/swift-object-server',
'/etc/swift/object-server/1.conf'
],
'swift-object-server',
)
self.mock_check_call.side_effect = subprocess.CalledProcessError(
2, 'swift-object-server')
with self.assertRaises(SystemExit) as caught:
reload.main(['123'])
self.assertEqual(caught.exception.args, (reload.EXIT_RELOAD_FAILED,))
self.assertEqual(self.mock_check_call.mock_calls, [mock.call([
'/usr/bin/python3',
'/usr/bin/swift-object-server',
'/etc/swift/object-server/1.conf',
'--test-config',
])])
self.assertEqual(self.mock_kill.mock_calls, [])
def test_needs_pid(self):
with self.assertRaises(SystemExit) as caught:
reload.main([])
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
msg = 'usage: \nSafely reload WSGI servers'
self.assertEqual(self.mock_stderr.getvalue()[:len(msg)], msg)
if six.PY2:
msg = '\n: error: too few arguments\n'
else:
msg = '\n: error: the following arguments are required: pid\n'
self.assertEqual(self.mock_stderr.getvalue()[-len(msg):], msg)