Prevent undesired retry when starting container

Previously, if we fail to start a storlet container using tenant
image, we retry to start it using default tenant image. However,
that sometimes causes unexpected retry, for example when there is
something wrong in tenant image which prevents daemon factory to
launch in time.

This patch makes docker runtime to check return code of container
restarting script, so that we only restart container when the
runtime fails in container operation using tenant image.
In addition, it lets docker runtime to log stdout/stderr output
generated by restart_docker_container, to help us to debug what
is happening about container management.

Change-Id: Icd81d28b9b3555bce365e7b279ada074d239fddc
This commit is contained in:
Takashi Kajinami 2017-01-12 03:19:10 +09:00 committed by Takashi Kajinami
parent 153c8fe708
commit 25e7201203
3 changed files with 93 additions and 34 deletions
scripts
storlets/gateway/gateways/docker
tests/unit/gateway/gateways/docker

@ -56,12 +56,12 @@ int main(int argc, char **argv) {
snprintf(mount_dir5,(size_t)512, "%s", argv[7]);
int ret;
setresuid(0,0,0);
setresgid(0,0,0);
sprintf(command,"/usr/bin/docker stop -t 1 %s",container_name);
setresuid(0, 0, 0);
setresgid(0, 0, 0);
sprintf(command, "/usr/bin/docker stop -t 1 %s", container_name);
ret = system(command);
sprintf(command,"/usr/bin/docker rm %s",container_name);
sprintf(command, "/usr/bin/docker rm %s", container_name);
ret = system(command);
sprintf(command,
@ -74,7 +74,8 @@ int main(int argc, char **argv) {
mount_dir5,
container_image);
ret = system(command);
if (ret)
return(EXIT_FAILURE);
return(EXIT_SUCCESS);
if(ret){
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}

@ -314,7 +314,7 @@ class RunTimeSandbox(object):
Restarts the scope's sandbox using the specified docker image
:param docker_image_name: name of the docker image to start
:returns: returned value of restart_docker_container
:raises StorletRuntimeException: when failed to restart the container
"""
if self.docker_repo:
docker_image_name = '%s/%s' % (self.docker_repo,
@ -343,9 +343,17 @@ class RunTimeSandbox(object):
storlet_mount, storlet_python_lib_mount,
storlet_native_lib_mount, storlet_native_bin_mount]
self.logger.debug('About to start container %s' % cmd)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
return subprocess.call(cmd)
if stdout:
self.logger.debug('STDOUT: %s' % stdout.replace('\n', '#012'))
if stderr:
self.logger.error('STDERR: %s' % stderr.replace('\n', '#012'))
if proc.returncode:
raise StorletRuntimeException('Failed to restart docker container')
def restart(self):
"""
@ -355,20 +363,24 @@ class RunTimeSandbox(object):
self.paths.create_host_pipe_prefix()
docker_image_name = self.scope
if self._restart(docker_image_name) == 0:
try:
self._restart(docker_image_name)
self.wait()
return
except StorletTimeout:
raise
except StorletRuntimeException:
# We were unable to start docker container from the tenant image.
# Let us try to start docker container from default image.
self.logger.exception("Failed to start docker container from "
"tenant image %s" % docker_image_name)
# We were unable to start docker container from the tenant image.
# Let us try to start docker container from default image.
self.logger.info("Failed to start docker container from tenant image "
"%s" % docker_image_name)
self.logger.info("Trying to start docker container from default image")
# TODO(eranr): move the default tenant image name to a config var
docker_image_name = 'ubuntu_16.04_jre8_storlets'
# TODO(eranr): move the default tenant image name to a config var
docker_image_name = 'ubuntu_16.04_jre8_storlets'
self._restart(docker_image_name)
self.wait()
self.logger.info("Trying to start docker container from default "
"image: %s" % docker_image_name)
self._restart(docker_image_name)
self.wait()
def start_storlet_daemon(
self, spath, storlet_id, language, language_version=None):
@ -485,11 +497,7 @@ class RunTimeSandbox(object):
# Best we can do is execute the container.
self.logger.debug('Failed to check Storlet daemon status, '
'restart Docker container')
try:
self.restart()
except StorletTimeout:
raise StorletRuntimeException('Docker container is '
'not responsive')
self.restart()
storlet_daemon_status = 0
if (cache_updated is True and storlet_daemon_status == 1):
@ -499,11 +507,7 @@ class RunTimeSandbox(object):
'is running. Stopping daemon')
res = self.stop_storlet_daemon(sreq.storlet_main)
if res != 1:
try:
self.restart()
except StorletTimeout:
raise StorletRuntimeException('Docker container is '
'not responsive')
self.restart()
else:
self.logger.debug('Deamon stopped')
storlet_daemon_status = 0

@ -323,29 +323,83 @@ class TestRunTimeSandbox(unittest.TestCase):
# TODO(takashi): should test timeout case
def test_restart(self):
class FakeProc(object):
def __init__(self, stdout, stderr, code):
self.stdout = stdout
self.stderr = stderr
self.returncode = code
def communicate(self):
return (self.stdout, self.stderr)
with mock.patch('storlets.gateway.gateways.docker.runtime.'
'RunTimePaths.create_host_pipe_prefix'), \
mock.patch('storlets.gateway.gateways.docker.runtime.'
'subprocess.call'):
'subprocess.Popen') as popen:
_wait = self.sbox.wait
def dummy_wait_success(*args, **kwargs):
return 1
self.sbox.wait = dummy_wait_success
# Test that popen is called successfully
popen.return_value = FakeProc('Try to restart\nOK', '', 0)
self.sbox.restart()
self.assertEqual(1, popen.call_count)
self.sbox.wait = _wait
with mock.patch('storlets.gateway.gateways.docker.runtime.'
'RunTimePaths.create_host_pipe_prefix'), \
mock.patch('storlets.gateway.gateways.docker.runtime.'
'subprocess.call'):
'subprocess.Popen') as popen:
_wait = self.sbox.wait
def dummy_wait_success(*args, **kwargs):
return 1
self.sbox.wait = dummy_wait_success
# Test double failure to restart the container
# for both the tenant image and generic image
popen.return_value = FakeProc('Try to restart', 'Some error', 1)
with self.assertRaises(StorletRuntimeException):
self.sbox.restart()
self.assertEqual(2, popen.call_count)
self.sbox.wait = _wait
with mock.patch('storlets.gateway.gateways.docker.runtime.'
'RunTimePaths.create_host_pipe_prefix'), \
mock.patch('storlets.gateway.gateways.docker.runtime.'
'subprocess.Popen') as popen:
_wait = self.sbox.wait
def dummy_wait_success(*args, **kwargs):
return 1
self.sbox.wait = dummy_wait_success
# Test failure to restart the container for the tenant image
# success for the generic image
popen.side_effect = [FakeProc('Try to restart', 'Some error', 1),
FakeProc('Try to restart\nOK', '', 0)]
self.sbox.restart()
self.assertEqual(2, popen.call_count)
self.sbox.wait = _wait
with mock.patch('storlets.gateway.gateways.docker.runtime.'
'RunTimePaths.create_host_pipe_prefix'), \
mock.patch('storlets.gateway.gateways.docker.runtime.'
'subprocess.Popen') as popen:
_wait = self.sbox.wait
def dummy_wait_failure(*args, **kwargs):
raise StorletRuntimeException()
raise StorletTimeout()
self.sbox.wait = dummy_wait_failure
popen.return_value = FakeProc('OK', '', 0)
with self.assertRaises(StorletRuntimeException):
self.sbox.restart()
self.sbox.wait = _wait