Prevent undesired retry when starting container
Previously, if we fail to start a storlet container using tenant image, we retry to start it using default tenant image. However, that sometimes causes unexpected retry, for example when there is something wrong in tenant image which prevents daemon factory to launch in time. This patch makes docker runtime to check return code of container restarting script, so that we only restart container when the runtime fails in container operation using tenant image. In addition, it lets docker runtime to log stdout/stderr output generated by restart_docker_container, to help us to debug what is happening about container management. Change-Id: Icd81d28b9b3555bce365e7b279ada074d239fddc
This commit is contained in:
parent
153c8fe708
commit
25e7201203
scripts
storlets/gateway/gateways/docker
tests/unit/gateway/gateways/docker
@ -56,12 +56,12 @@ int main(int argc, char **argv) {
|
||||
snprintf(mount_dir5,(size_t)512, "%s", argv[7]);
|
||||
|
||||
int ret;
|
||||
setresuid(0,0,0);
|
||||
setresgid(0,0,0);
|
||||
sprintf(command,"/usr/bin/docker stop -t 1 %s",container_name);
|
||||
setresuid(0, 0, 0);
|
||||
setresgid(0, 0, 0);
|
||||
sprintf(command, "/usr/bin/docker stop -t 1 %s", container_name);
|
||||
ret = system(command);
|
||||
|
||||
sprintf(command,"/usr/bin/docker rm %s",container_name);
|
||||
sprintf(command, "/usr/bin/docker rm %s", container_name);
|
||||
ret = system(command);
|
||||
|
||||
sprintf(command,
|
||||
@ -74,7 +74,8 @@ int main(int argc, char **argv) {
|
||||
mount_dir5,
|
||||
container_image);
|
||||
ret = system(command);
|
||||
if (ret)
|
||||
return(EXIT_FAILURE);
|
||||
return(EXIT_SUCCESS);
|
||||
if(ret){
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -314,7 +314,7 @@ class RunTimeSandbox(object):
|
||||
Restarts the scope's sandbox using the specified docker image
|
||||
|
||||
:param docker_image_name: name of the docker image to start
|
||||
:returns: returned value of restart_docker_container
|
||||
:raises StorletRuntimeException: when failed to restart the container
|
||||
"""
|
||||
if self.docker_repo:
|
||||
docker_image_name = '%s/%s' % (self.docker_repo,
|
||||
@ -343,9 +343,17 @@ class RunTimeSandbox(object):
|
||||
storlet_mount, storlet_python_lib_mount,
|
||||
storlet_native_lib_mount, storlet_native_bin_mount]
|
||||
|
||||
self.logger.debug('About to start container %s' % cmd)
|
||||
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE)
|
||||
stdout, stderr = proc.communicate()
|
||||
|
||||
return subprocess.call(cmd)
|
||||
if stdout:
|
||||
self.logger.debug('STDOUT: %s' % stdout.replace('\n', '#012'))
|
||||
if stderr:
|
||||
self.logger.error('STDERR: %s' % stderr.replace('\n', '#012'))
|
||||
|
||||
if proc.returncode:
|
||||
raise StorletRuntimeException('Failed to restart docker container')
|
||||
|
||||
def restart(self):
|
||||
"""
|
||||
@ -355,20 +363,24 @@ class RunTimeSandbox(object):
|
||||
self.paths.create_host_pipe_prefix()
|
||||
|
||||
docker_image_name = self.scope
|
||||
if self._restart(docker_image_name) == 0:
|
||||
try:
|
||||
self._restart(docker_image_name)
|
||||
self.wait()
|
||||
return
|
||||
except StorletTimeout:
|
||||
raise
|
||||
except StorletRuntimeException:
|
||||
# We were unable to start docker container from the tenant image.
|
||||
# Let us try to start docker container from default image.
|
||||
self.logger.exception("Failed to start docker container from "
|
||||
"tenant image %s" % docker_image_name)
|
||||
|
||||
# We were unable to start docker container from the tenant image.
|
||||
# Let us try to start docker container from default image.
|
||||
self.logger.info("Failed to start docker container from tenant image "
|
||||
"%s" % docker_image_name)
|
||||
self.logger.info("Trying to start docker container from default image")
|
||||
# TODO(eranr): move the default tenant image name to a config var
|
||||
docker_image_name = 'ubuntu_16.04_jre8_storlets'
|
||||
|
||||
# TODO(eranr): move the default tenant image name to a config var
|
||||
docker_image_name = 'ubuntu_16.04_jre8_storlets'
|
||||
self._restart(docker_image_name)
|
||||
self.wait()
|
||||
self.logger.info("Trying to start docker container from default "
|
||||
"image: %s" % docker_image_name)
|
||||
self._restart(docker_image_name)
|
||||
self.wait()
|
||||
|
||||
def start_storlet_daemon(
|
||||
self, spath, storlet_id, language, language_version=None):
|
||||
@ -485,11 +497,7 @@ class RunTimeSandbox(object):
|
||||
# Best we can do is execute the container.
|
||||
self.logger.debug('Failed to check Storlet daemon status, '
|
||||
'restart Docker container')
|
||||
try:
|
||||
self.restart()
|
||||
except StorletTimeout:
|
||||
raise StorletRuntimeException('Docker container is '
|
||||
'not responsive')
|
||||
self.restart()
|
||||
storlet_daemon_status = 0
|
||||
|
||||
if (cache_updated is True and storlet_daemon_status == 1):
|
||||
@ -499,11 +507,7 @@ class RunTimeSandbox(object):
|
||||
'is running. Stopping daemon')
|
||||
res = self.stop_storlet_daemon(sreq.storlet_main)
|
||||
if res != 1:
|
||||
try:
|
||||
self.restart()
|
||||
except StorletTimeout:
|
||||
raise StorletRuntimeException('Docker container is '
|
||||
'not responsive')
|
||||
self.restart()
|
||||
else:
|
||||
self.logger.debug('Deamon stopped')
|
||||
storlet_daemon_status = 0
|
||||
|
@ -323,29 +323,83 @@ class TestRunTimeSandbox(unittest.TestCase):
|
||||
# TODO(takashi): should test timeout case
|
||||
|
||||
def test_restart(self):
|
||||
|
||||
class FakeProc(object):
|
||||
def __init__(self, stdout, stderr, code):
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
self.returncode = code
|
||||
|
||||
def communicate(self):
|
||||
return (self.stdout, self.stderr)
|
||||
|
||||
with mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'RunTimePaths.create_host_pipe_prefix'), \
|
||||
mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'subprocess.call'):
|
||||
'subprocess.Popen') as popen:
|
||||
_wait = self.sbox.wait
|
||||
|
||||
def dummy_wait_success(*args, **kwargs):
|
||||
return 1
|
||||
|
||||
self.sbox.wait = dummy_wait_success
|
||||
|
||||
# Test that popen is called successfully
|
||||
popen.return_value = FakeProc('Try to restart\nOK', '', 0)
|
||||
self.sbox.restart()
|
||||
self.assertEqual(1, popen.call_count)
|
||||
self.sbox.wait = _wait
|
||||
|
||||
with mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'RunTimePaths.create_host_pipe_prefix'), \
|
||||
mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'subprocess.call'):
|
||||
'subprocess.Popen') as popen:
|
||||
_wait = self.sbox.wait
|
||||
|
||||
def dummy_wait_success(*args, **kwargs):
|
||||
return 1
|
||||
|
||||
self.sbox.wait = dummy_wait_success
|
||||
|
||||
# Test double failure to restart the container
|
||||
# for both the tenant image and generic image
|
||||
popen.return_value = FakeProc('Try to restart', 'Some error', 1)
|
||||
with self.assertRaises(StorletRuntimeException):
|
||||
self.sbox.restart()
|
||||
self.assertEqual(2, popen.call_count)
|
||||
self.sbox.wait = _wait
|
||||
|
||||
with mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'RunTimePaths.create_host_pipe_prefix'), \
|
||||
mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'subprocess.Popen') as popen:
|
||||
_wait = self.sbox.wait
|
||||
|
||||
def dummy_wait_success(*args, **kwargs):
|
||||
return 1
|
||||
|
||||
self.sbox.wait = dummy_wait_success
|
||||
|
||||
# Test failure to restart the container for the tenant image
|
||||
# success for the generic image
|
||||
popen.side_effect = [FakeProc('Try to restart', 'Some error', 1),
|
||||
FakeProc('Try to restart\nOK', '', 0)]
|
||||
self.sbox.restart()
|
||||
self.assertEqual(2, popen.call_count)
|
||||
self.sbox.wait = _wait
|
||||
|
||||
with mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'RunTimePaths.create_host_pipe_prefix'), \
|
||||
mock.patch('storlets.gateway.gateways.docker.runtime.'
|
||||
'subprocess.Popen') as popen:
|
||||
_wait = self.sbox.wait
|
||||
|
||||
def dummy_wait_failure(*args, **kwargs):
|
||||
raise StorletRuntimeException()
|
||||
raise StorletTimeout()
|
||||
|
||||
self.sbox.wait = dummy_wait_failure
|
||||
|
||||
popen.return_value = FakeProc('OK', '', 0)
|
||||
with self.assertRaises(StorletRuntimeException):
|
||||
self.sbox.restart()
|
||||
self.sbox.wait = _wait
|
||||
|
Loading…
x
Reference in New Issue
Block a user