Add a swift-reload command
Previously, WSGI server systemd unit files might have used something like ExecReload=kill -USR1 $MAINPID This was risky; in the related change, reloads were made safer, but required more than one ExecReload line. Meanwhile, systemd docs (https://www.freedesktop.org/software/systemd/man/systemd.service.html#ExecReload=) say > It is strongly recommended to set ExecReload= to a command that > not only triggers a configuration reload of the daemon, but also > synchronously waits for it to complete. which *neither* set of ExecReloads would do. Now, add a new swift-reload command which, given a pid, * validates that the PID seems to belong to a Swift WSGI server manager process, * checks that the config used by that PID is still valid, * signals the PID to perform a seamless reload, and * waits for the reload to complete by monitoring the PID's children. As a result, WSGI server systemd unit files can now use something like ExecReload=swift-reload $MAINPID to follow systemd recommendations. Change-Id: Ifcadd2f8427f107aae1921cdd311f7973b0312e1 Related-Change: I9e5e158ce8be92535430b9cabf040063f5188bf4
This commit is contained in:
parent
9191a32e2e
commit
212525118c
@ -91,6 +91,7 @@ keystone =
|
||||
console_scripts =
|
||||
swift-manage-shard-ranges = swift.cli.manage_shard_ranges:main
|
||||
swift-container-deleter = swift.cli.container_deleter:main
|
||||
swift-reload = swift.cli.reload:main
|
||||
|
||||
paste.app_factory =
|
||||
proxy = swift.proxy.server:app_factory
|
||||
|
141
swift/cli/reload.py
Executable file
141
swift/cli/reload.py
Executable file
@ -0,0 +1,141 @@
|
||||
# Copyright (c) 2022 NVIDIA
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""
|
||||
Safely reload WSGI servers while minimizing client downtime and errors by
|
||||
|
||||
* validating that the process is a Swift WSGI server manager,
|
||||
* checking that the configuration file used is valid,
|
||||
* sending the "seamless reload" signal, and
|
||||
* waiting for the reload to complete.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import errno
|
||||
import os
|
||||
import os.path
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
from swift.common.manager import get_child_pids
|
||||
|
||||
|
||||
EXIT_BAD_PID = 2 # similar to argparse exiting 2 on an unknown arg
|
||||
EXIT_RELOAD_FAILED = 1
|
||||
EXIT_RELOAD_TIMEOUT = 128 + errno.ETIMEDOUT
|
||||
|
||||
|
||||
def validate_manager_pid(pid):
|
||||
try:
|
||||
with open('/proc/%d/cmdline' % pid, 'r') as fp:
|
||||
cmd = fp.read().strip('\x00').split('\x00')
|
||||
sid = os.getsid(pid)
|
||||
except (IOError, OSError):
|
||||
print("Failed to get process information for %s" % pid,
|
||||
file=sys.stderr)
|
||||
exit(EXIT_BAD_PID)
|
||||
|
||||
scripts = [os.path.basename(c) for c in cmd
|
||||
if '/bin/' in c and '/bin/python' not in c]
|
||||
|
||||
if len(scripts) != 1 or not scripts[0].startswith("swift-"):
|
||||
print("Non-swift process: %r" % ' '.join(cmd), file=sys.stderr)
|
||||
exit(EXIT_BAD_PID)
|
||||
|
||||
if scripts[0] not in {"swift-proxy-server", "swift-account-server",
|
||||
"swift-container-server", "swift-object-server"}:
|
||||
print("Process does not support config checks: %s" % scripts[0],
|
||||
file=sys.stderr)
|
||||
exit(EXIT_BAD_PID)
|
||||
|
||||
if sid != pid:
|
||||
print("Process appears to be a %s worker, not a manager. "
|
||||
"Did you mean %s?" % (scripts[0], sid), file=sys.stderr)
|
||||
exit(EXIT_BAD_PID)
|
||||
|
||||
return cmd, scripts[0]
|
||||
|
||||
|
||||
def main(args=None):
|
||||
parser = argparse.ArgumentParser(__doc__)
|
||||
parser.add_argument("pid", type=int,
|
||||
help="server PID which should be reloaded")
|
||||
wait_group = parser.add_mutually_exclusive_group()
|
||||
wait_group.add_argument("-t", "--timeout", type=float, default=300.0,
|
||||
help="max time to wait for reload to complete")
|
||||
wait_group.add_argument("-w", "--no-wait",
|
||||
action="store_false", dest="wait",
|
||||
help="skip waiting for reload to complete")
|
||||
parser.add_argument("-v", "--verbose", action="store_true",
|
||||
help="display more information as the process reloads")
|
||||
args = parser.parse_args(args)
|
||||
|
||||
cmd, script = validate_manager_pid(args.pid)
|
||||
|
||||
if args.verbose:
|
||||
print("Checking config for %s" % script)
|
||||
try:
|
||||
subprocess.check_call(cmd + ["--test-config"])
|
||||
except subprocess.CalledProcessError:
|
||||
print("Failed to validate config", file=sys.stderr)
|
||||
exit(EXIT_RELOAD_FAILED)
|
||||
|
||||
if args.wait:
|
||||
try:
|
||||
original_children = get_child_pids(args.pid)
|
||||
children_since_reload = set()
|
||||
|
||||
if args.verbose:
|
||||
print("Sending USR1 signal")
|
||||
os.kill(args.pid, signal.SIGUSR1)
|
||||
|
||||
start = time.time()
|
||||
while time.time() - start < args.timeout:
|
||||
children = get_child_pids(args.pid)
|
||||
new_children = (children - original_children
|
||||
- children_since_reload)
|
||||
if new_children:
|
||||
if args.verbose:
|
||||
print("Found new children: %s" % ", ".join(
|
||||
str(pid) for pid in new_children))
|
||||
children_since_reload |= new_children
|
||||
if children_since_reload - children:
|
||||
# At least one new child exited; presumably, it was
|
||||
# the temporary child waiting to shutdown sockets
|
||||
break
|
||||
# We want this to be fairly low, since the temporary child
|
||||
# may not hang around very long
|
||||
time.sleep(0.1)
|
||||
else:
|
||||
print("Timed out reloading %s" % script, file=sys.stderr)
|
||||
exit(EXIT_RELOAD_TIMEOUT)
|
||||
|
||||
except subprocess.CalledProcessError:
|
||||
# This could pop during any of the calls to get_child_pids
|
||||
print("Process seems to have died!", file=sys.stderr)
|
||||
exit(EXIT_RELOAD_FAILED)
|
||||
else: # --no-wait
|
||||
if args.verbose:
|
||||
print("Sending USR1 signal")
|
||||
os.kill(args.pid, signal.SIGUSR1)
|
||||
|
||||
print("Reloaded %s" % script)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -180,6 +180,17 @@ def kill_group(pid, sig):
|
||||
os.kill(-pid, sig)
|
||||
|
||||
|
||||
def get_child_pids(pid):
|
||||
"""
|
||||
Get the current set of all child PIDs for a PID.
|
||||
|
||||
:param pid: process id
|
||||
"""
|
||||
output = subprocess.check_output(
|
||||
["ps", "--ppid", str(pid), "--no-headers", "-o", "pid"])
|
||||
return {int(pid) for pid in output.split()}
|
||||
|
||||
|
||||
def format_server_name(servername):
|
||||
"""
|
||||
Formats server name as swift compatible server names
|
||||
@ -700,9 +711,7 @@ class Server(object):
|
||||
print('Removing pid file %s with invalid pid' % pid_file)
|
||||
remove_file(pid_file)
|
||||
continue
|
||||
ps_cmd = ['ps', '--ppid', str(pid), '--no-headers', '-o', 'pid']
|
||||
for pid in subprocess.check_output(ps_cmd).split():
|
||||
pid = int(pid)
|
||||
for pid in get_child_pids(pid):
|
||||
if self._signal_pid(sig, pid, pid_file, kwargs.get('verbose')):
|
||||
pids[pid] = pid_file
|
||||
return pids
|
||||
|
220
test/unit/cli/test_reload.py
Normal file
220
test/unit/cli/test_reload.py
Normal file
@ -0,0 +1,220 @@
|
||||
# Copyright (c) 2022 NVIDIA
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import mock
|
||||
import signal
|
||||
import six
|
||||
import subprocess
|
||||
import unittest
|
||||
|
||||
from six.moves import StringIO
|
||||
from swift.cli import reload
|
||||
|
||||
|
||||
@mock.patch('sys.stderr', new_callable=StringIO)
|
||||
class TestValidateManagerPid(unittest.TestCase):
|
||||
def test_good(self, mock_stderr):
|
||||
cmd_args = [
|
||||
'/usr/local/bin/python3.9',
|
||||
'/usr/local/bin/swift-proxy-server',
|
||||
'/etc/swift/proxy-server.conf',
|
||||
'some',
|
||||
'extra',
|
||||
'args',
|
||||
]
|
||||
with mock.patch.object(reload, 'open', mock.mock_open(
|
||||
read_data='\x00'.join(cmd_args) + '\x00'
|
||||
)) as mock_open, mock.patch('os.getsid', return_value=123):
|
||||
self.assertEqual(reload.validate_manager_pid(123), (
|
||||
cmd_args,
|
||||
'swift-proxy-server',
|
||||
))
|
||||
self.assertEqual(mock_open.mock_calls[0],
|
||||
mock.call('/proc/123/cmdline', 'r'))
|
||||
|
||||
def test_open_error(self, mock_stderr):
|
||||
with mock.patch.object(reload, 'open', side_effect=OSError), \
|
||||
self.assertRaises(SystemExit) as caught:
|
||||
reload.validate_manager_pid(123)
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
|
||||
self.assertEqual(mock_stderr.getvalue(),
|
||||
'Failed to get process information for 123\n')
|
||||
|
||||
def test_non_python(self, mock_stderr):
|
||||
with mock.patch.object(reload, 'open', mock.mock_open(
|
||||
read_data='/usr/bin/rsync\x00'
|
||||
)), mock.patch('os.getsid', return_value=56), \
|
||||
self.assertRaises(SystemExit) as caught:
|
||||
reload.validate_manager_pid(56)
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
|
||||
self.assertEqual(mock_stderr.getvalue(),
|
||||
"Non-swift process: '/usr/bin/rsync'\n")
|
||||
|
||||
def test_non_swift(self, mock_stderr):
|
||||
with mock.patch.object(reload, 'open', mock.mock_open(
|
||||
read_data='/usr/bin/python\x00some-script\x00'
|
||||
)), mock.patch('os.getsid', return_value=123), \
|
||||
self.assertRaises(SystemExit) as caught:
|
||||
reload.validate_manager_pid(123)
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
|
||||
self.assertEqual(mock_stderr.getvalue(),
|
||||
"Non-swift process: '/usr/bin/python some-script'\n")
|
||||
|
||||
def test_worker(self, mock_stderr):
|
||||
cmd_args = [
|
||||
'/usr/bin/python3.9',
|
||||
'/usr/bin/swift-proxy-server',
|
||||
'/etc/swift/proxy-server.conf',
|
||||
]
|
||||
with mock.patch.object(reload, 'open', mock.mock_open(
|
||||
read_data='\x00'.join(cmd_args) + '\x00'
|
||||
)) as mock_open, mock.patch('os.getsid', return_value=123), \
|
||||
self.assertRaises(SystemExit) as caught:
|
||||
reload.validate_manager_pid(56)
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
|
||||
self.assertEqual(mock_stderr.getvalue(),
|
||||
'Process appears to be a swift-proxy-server worker, '
|
||||
'not a manager. Did you mean 123?\n')
|
||||
self.assertEqual(mock_open.mock_calls[0],
|
||||
mock.call('/proc/56/cmdline', 'r'))
|
||||
|
||||
def test_non_server(self, mock_stderr):
|
||||
cmd_args = [
|
||||
'/usr/bin/swift-ring-builder',
|
||||
'/etc/swift/object.builder',
|
||||
'rebalance',
|
||||
]
|
||||
with mock.patch.object(reload, 'open', mock.mock_open(
|
||||
read_data='\x00'.join(cmd_args) + '\x00'
|
||||
)) as mock_open, mock.patch('os.getsid', return_value=123), \
|
||||
self.assertRaises(SystemExit) as caught:
|
||||
reload.validate_manager_pid(123)
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
|
||||
self.assertEqual(mock_stderr.getvalue(),
|
||||
'Process does not support config checks: '
|
||||
'swift-ring-builder\n')
|
||||
self.assertEqual(mock_open.mock_calls[0],
|
||||
mock.call('/proc/123/cmdline', 'r'))
|
||||
|
||||
|
||||
class TestMain(unittest.TestCase):
|
||||
def setUp(self):
|
||||
patcher = mock.patch('sys.stderr', new_callable=StringIO)
|
||||
self.mock_stderr = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
patcher = mock.patch('subprocess.check_call')
|
||||
self.mock_check_call = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
patcher = mock.patch.object(reload, 'validate_manager_pid')
|
||||
self.mock_validate = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
patcher = mock.patch.object(reload, 'get_child_pids')
|
||||
self.mock_get_child_pids = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
patcher = mock.patch('os.kill')
|
||||
self.mock_kill = patcher.start()
|
||||
self.addCleanup(patcher.stop)
|
||||
|
||||
def test_good(self):
|
||||
self.mock_validate.return_value = (
|
||||
[
|
||||
'/usr/bin/swift-proxy-server',
|
||||
'/etc/swift/proxy-server.conf'
|
||||
],
|
||||
'swift-proxy-server',
|
||||
)
|
||||
self.mock_get_child_pids.side_effect = [
|
||||
{'worker1', 'worker2'},
|
||||
{'worker1', 'worker2', 'foster parent'},
|
||||
{'worker1', 'worker2', 'foster parent', 'new worker'},
|
||||
{'worker1', 'worker2', 'new worker'},
|
||||
]
|
||||
self.assertIsNone(reload.main(['123']))
|
||||
self.assertEqual(self.mock_check_call.mock_calls, [mock.call([
|
||||
'/usr/bin/swift-proxy-server',
|
||||
'/etc/swift/proxy-server.conf',
|
||||
'--test-config',
|
||||
])])
|
||||
self.assertEqual(self.mock_kill.mock_calls, [
|
||||
mock.call(123, signal.SIGUSR1),
|
||||
])
|
||||
|
||||
@mock.patch('time.time', side_effect=[1, 10, 100, 400])
|
||||
def test_timeout(self, mock_time):
|
||||
self.mock_validate.return_value = (
|
||||
[
|
||||
'/usr/bin/python3',
|
||||
'/usr/bin/swift-proxy-server',
|
||||
'/etc/swift/proxy-server.conf'
|
||||
],
|
||||
'swift-proxy-server',
|
||||
)
|
||||
self.mock_get_child_pids.side_effect = [
|
||||
{'worker1', 'worker2'},
|
||||
{'worker1', 'worker2', 'foster parent'},
|
||||
{'worker1', 'worker2', 'foster parent', 'new worker'},
|
||||
]
|
||||
with self.assertRaises(SystemExit) as caught:
|
||||
reload.main(['123'])
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_RELOAD_TIMEOUT,))
|
||||
self.assertEqual(self.mock_check_call.mock_calls, [mock.call([
|
||||
'/usr/bin/python3',
|
||||
'/usr/bin/swift-proxy-server',
|
||||
'/etc/swift/proxy-server.conf',
|
||||
'--test-config',
|
||||
])])
|
||||
self.assertEqual(self.mock_kill.mock_calls, [
|
||||
mock.call(123, signal.SIGUSR1),
|
||||
])
|
||||
self.assertEqual(self.mock_stderr.getvalue(),
|
||||
'Timed out reloading swift-proxy-server\n')
|
||||
|
||||
def test_check_failed(self):
|
||||
self.mock_validate.return_value = (
|
||||
[
|
||||
'/usr/bin/python3',
|
||||
'/usr/bin/swift-object-server',
|
||||
'/etc/swift/object-server/1.conf'
|
||||
],
|
||||
'swift-object-server',
|
||||
)
|
||||
self.mock_check_call.side_effect = subprocess.CalledProcessError(
|
||||
2, 'swift-object-server')
|
||||
with self.assertRaises(SystemExit) as caught:
|
||||
reload.main(['123'])
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_RELOAD_FAILED,))
|
||||
self.assertEqual(self.mock_check_call.mock_calls, [mock.call([
|
||||
'/usr/bin/python3',
|
||||
'/usr/bin/swift-object-server',
|
||||
'/etc/swift/object-server/1.conf',
|
||||
'--test-config',
|
||||
])])
|
||||
self.assertEqual(self.mock_kill.mock_calls, [])
|
||||
|
||||
def test_needs_pid(self):
|
||||
with self.assertRaises(SystemExit) as caught:
|
||||
reload.main([])
|
||||
self.assertEqual(caught.exception.args, (reload.EXIT_BAD_PID,))
|
||||
msg = 'usage: \nSafely reload WSGI servers'
|
||||
self.assertEqual(self.mock_stderr.getvalue()[:len(msg)], msg)
|
||||
if six.PY2:
|
||||
msg = '\n: error: too few arguments\n'
|
||||
else:
|
||||
msg = '\n: error: the following arguments are required: pid\n'
|
||||
self.assertEqual(self.mock_stderr.getvalue()[-len(msg):], msg)
|
Loading…
x
Reference in New Issue
Block a user