swift-init: New option kill-after-timeout
This option send SIGKILL to daemon after kill_wait period. When daemon hangs and doesn't respond to SIGTERM/SIGHUP there is no way to stop it using swift-init now. Classic init scripts in Linux kills hanged process after grace period and this patch add same behaviour. This is most usefull when using "restart" on hanged daemon. Change-Id: I8c932b673a0f51e52132df87ea2f4396f4bba9d8
This commit is contained in:
parent
dafeea6322
commit
3b1591f235
@ -74,6 +74,11 @@ def main():
|
|||||||
help="Return zero status code even if some config is "
|
help="Return zero status code even if some config is "
|
||||||
"missing. Default mode if any server is a glob or "
|
"missing. Default mode if any server is a glob or "
|
||||||
"one of aliases `all`, `main` or `rest`.")
|
"one of aliases `all`, `main` or `rest`.")
|
||||||
|
# SIGKILL daemon after kill_wait period
|
||||||
|
parser.add_option('--kill-after-timeout', dest='kill_after_timeout',
|
||||||
|
action='store_true',
|
||||||
|
help="Kill daemon and all childs after kill-wait "
|
||||||
|
"period.")
|
||||||
|
|
||||||
options, args = parser.parse_args()
|
options, args = parser.parse_args()
|
||||||
|
|
||||||
|
@ -111,6 +111,7 @@ allows one to use the keywords such as "all", "main" and "rest" for the <server>
|
|||||||
.IP "-r RUN_DIR, --run-dir=RUN_DIR directory where the pids will be stored (default /var/run/swift)
|
.IP "-r RUN_DIR, --run-dir=RUN_DIR directory where the pids will be stored (default /var/run/swift)
|
||||||
.IP "--strict return non-zero status code if some config is missing. Default mode if server is explicitly named."
|
.IP "--strict return non-zero status code if some config is missing. Default mode if server is explicitly named."
|
||||||
.IP "--non-strict return zero status code even if some config is missing. Default mode if server is one of aliases `all`, `main` or `rest`."
|
.IP "--non-strict return zero status code even if some config is missing. Default mode if server is one of aliases `all`, `main` or `rest`."
|
||||||
|
.IP "--kill-after-timeout kill daemon and all childs after kill-wait period."
|
||||||
.PD
|
.PD
|
||||||
.RE
|
.RE
|
||||||
|
|
||||||
|
@ -162,6 +162,16 @@ def safe_kill(pid, sig, name):
|
|||||||
os.kill(pid, sig)
|
os.kill(pid, sig)
|
||||||
|
|
||||||
|
|
||||||
|
def kill_group(pid, sig):
|
||||||
|
"""Send signal to process group
|
||||||
|
|
||||||
|
: param pid: process id
|
||||||
|
: param sig: signal to send
|
||||||
|
"""
|
||||||
|
# Negative PID means process group
|
||||||
|
os.kill(-pid, sig)
|
||||||
|
|
||||||
|
|
||||||
class UnknownCommandError(Exception):
|
class UnknownCommandError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -285,9 +295,25 @@ class Manager(object):
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
# reached interval n watch_pids w/o killing all servers
|
# reached interval n watch_pids w/o killing all servers
|
||||||
|
kill_after_timeout = kwargs.get('kill_after_timeout', False)
|
||||||
for server, pids in server_pids.items():
|
for server, pids in server_pids.items():
|
||||||
if not killed_pids.issuperset(pids):
|
if not killed_pids.issuperset(pids):
|
||||||
# some pids of this server were not killed
|
# some pids of this server were not killed
|
||||||
|
if kill_after_timeout:
|
||||||
|
print(_('Waited %s seconds for %s to die; killing') % (
|
||||||
|
kill_wait, server))
|
||||||
|
# Send SIGKILL to all remaining pids
|
||||||
|
for pid in set(pids.keys()) - killed_pids:
|
||||||
|
print(_('Signal %s pid: %s signal: %s') % (
|
||||||
|
server, pid, signal.SIGKILL))
|
||||||
|
# Send SIGKILL to process group
|
||||||
|
try:
|
||||||
|
kill_group(pid, signal.SIGKILL)
|
||||||
|
except OSError as e:
|
||||||
|
# PID died before kill_group can take action?
|
||||||
|
if e.errno != errno.ESRCH:
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
print(_('Waited %s seconds for %s to die; giving up') % (
|
print(_('Waited %s seconds for %s to die; giving up') % (
|
||||||
kill_wait, server))
|
kill_wait, server))
|
||||||
return 1
|
return 1
|
||||||
|
@ -1916,13 +1916,18 @@ class TestManager(unittest.TestCase):
|
|||||||
continue
|
continue
|
||||||
yield server, pid
|
yield server, pid
|
||||||
|
|
||||||
|
def mock_kill_group(pid, sig):
|
||||||
|
self.fail('kill_group should not be called')
|
||||||
|
|
||||||
_orig_server = manager.Server
|
_orig_server = manager.Server
|
||||||
_orig_watch_server_pids = manager.watch_server_pids
|
_orig_watch_server_pids = manager.watch_server_pids
|
||||||
|
_orig_kill_group = manager.kill_group
|
||||||
try:
|
try:
|
||||||
manager.watch_server_pids = mock_watch_server_pids
|
manager.watch_server_pids = mock_watch_server_pids
|
||||||
|
manager.kill_group = mock_kill_group
|
||||||
# test stop one server
|
# test stop one server
|
||||||
server_pids = {
|
server_pids = {
|
||||||
'test': [1]
|
'test': {1: "dummy.pid"}
|
||||||
}
|
}
|
||||||
manager.Server = MockServerFactory(server_pids)
|
manager.Server = MockServerFactory(server_pids)
|
||||||
m = manager.Manager(['test'])
|
m = manager.Manager(['test'])
|
||||||
@ -1930,7 +1935,7 @@ class TestManager(unittest.TestCase):
|
|||||||
self.assertEqual(status, 0)
|
self.assertEqual(status, 0)
|
||||||
# test not running
|
# test not running
|
||||||
server_pids = {
|
server_pids = {
|
||||||
'test': []
|
'test': {}
|
||||||
}
|
}
|
||||||
manager.Server = MockServerFactory(server_pids)
|
manager.Server = MockServerFactory(server_pids)
|
||||||
m = manager.Manager(['test'])
|
m = manager.Manager(['test'])
|
||||||
@ -1938,7 +1943,7 @@ class TestManager(unittest.TestCase):
|
|||||||
self.assertEqual(status, 1)
|
self.assertEqual(status, 1)
|
||||||
# test kill not running
|
# test kill not running
|
||||||
server_pids = {
|
server_pids = {
|
||||||
'test': []
|
'test': {}
|
||||||
}
|
}
|
||||||
manager.Server = MockServerFactory(server_pids)
|
manager.Server = MockServerFactory(server_pids)
|
||||||
m = manager.Manager(['test'])
|
m = manager.Manager(['test'])
|
||||||
@ -1946,7 +1951,7 @@ class TestManager(unittest.TestCase):
|
|||||||
self.assertEqual(status, 0)
|
self.assertEqual(status, 0)
|
||||||
# test won't die
|
# test won't die
|
||||||
server_pids = {
|
server_pids = {
|
||||||
'test': [None]
|
'test': {None: None}
|
||||||
}
|
}
|
||||||
manager.Server = MockServerFactory(server_pids)
|
manager.Server = MockServerFactory(server_pids)
|
||||||
m = manager.Manager(['test'])
|
m = manager.Manager(['test'])
|
||||||
@ -1956,6 +1961,83 @@ class TestManager(unittest.TestCase):
|
|||||||
finally:
|
finally:
|
||||||
manager.Server = _orig_server
|
manager.Server = _orig_server
|
||||||
manager.watch_server_pids = _orig_watch_server_pids
|
manager.watch_server_pids = _orig_watch_server_pids
|
||||||
|
manager.kill_group = _orig_kill_group
|
||||||
|
|
||||||
|
def test_stop_kill_after_timeout(self):
|
||||||
|
class MockServerFactory(object):
|
||||||
|
class MockServer(object):
|
||||||
|
def __init__(self, pids, run_dir=manager.RUN_DIR):
|
||||||
|
self.pids = pids
|
||||||
|
|
||||||
|
def stop(self, **kwargs):
|
||||||
|
return self.pids
|
||||||
|
|
||||||
|
def status(self, **kwargs):
|
||||||
|
return not self.pids
|
||||||
|
|
||||||
|
def __init__(self, server_pids, run_dir=manager.RUN_DIR):
|
||||||
|
self.server_pids = server_pids
|
||||||
|
|
||||||
|
def __call__(self, server, run_dir=manager.RUN_DIR):
|
||||||
|
return MockServerFactory.MockServer(self.server_pids[server])
|
||||||
|
|
||||||
|
def mock_watch_server_pids(server_pids, **kwargs):
|
||||||
|
for server, pids in server_pids.items():
|
||||||
|
for pid in pids:
|
||||||
|
if pid is None:
|
||||||
|
continue
|
||||||
|
yield server, pid
|
||||||
|
|
||||||
|
mock_kill_group_called = []
|
||||||
|
|
||||||
|
def mock_kill_group(*args):
|
||||||
|
mock_kill_group_called.append(args)
|
||||||
|
|
||||||
|
def mock_kill_group_oserr(*args):
|
||||||
|
raise OSError()
|
||||||
|
|
||||||
|
def mock_kill_group_oserr_ESRCH(*args):
|
||||||
|
raise OSError(errno.ESRCH, 'No such process')
|
||||||
|
|
||||||
|
_orig_server = manager.Server
|
||||||
|
_orig_watch_server_pids = manager.watch_server_pids
|
||||||
|
_orig_kill_group = manager.kill_group
|
||||||
|
try:
|
||||||
|
manager.watch_server_pids = mock_watch_server_pids
|
||||||
|
manager.kill_group = mock_kill_group
|
||||||
|
# test stop one server
|
||||||
|
server_pids = {
|
||||||
|
'test': {None: None}
|
||||||
|
}
|
||||||
|
manager.Server = MockServerFactory(server_pids)
|
||||||
|
m = manager.Manager(['test'])
|
||||||
|
status = m.stop(kill_after_timeout=True)
|
||||||
|
self.assertEqual(status, 1)
|
||||||
|
self.assertEqual(mock_kill_group_called, [(None, 9)])
|
||||||
|
|
||||||
|
manager.kill_group = mock_kill_group_oserr
|
||||||
|
# test stop one server - OSError
|
||||||
|
server_pids = {
|
||||||
|
'test': {None: None}
|
||||||
|
}
|
||||||
|
manager.Server = MockServerFactory(server_pids)
|
||||||
|
m = manager.Manager(['test'])
|
||||||
|
with self.assertRaises(OSError):
|
||||||
|
status = m.stop(kill_after_timeout=True)
|
||||||
|
|
||||||
|
manager.kill_group = mock_kill_group_oserr_ESRCH
|
||||||
|
# test stop one server - OSError: No such process
|
||||||
|
server_pids = {
|
||||||
|
'test': {None: None}
|
||||||
|
}
|
||||||
|
manager.Server = MockServerFactory(server_pids)
|
||||||
|
m = manager.Manager(['test'])
|
||||||
|
status = m.stop(kill_after_timeout=True)
|
||||||
|
self.assertEqual(status, 1)
|
||||||
|
finally:
|
||||||
|
manager.Server = _orig_server
|
||||||
|
manager.watch_server_pids = _orig_watch_server_pids
|
||||||
|
manager.kill_group = _orig_kill_group
|
||||||
|
|
||||||
# TODO(clayg): more tests
|
# TODO(clayg): more tests
|
||||||
def test_shutdown(self):
|
def test_shutdown(self):
|
||||||
|
Loading…
Reference in New Issue
Block a user