Add docker containers support
A support for the containers is required for the current releases of OpenStack. Add the ability to 'start', 'terminate' and 'restart' the containers. Change-Id: Ib2beebcfa7017a9b2bb8f16dea49beef6db79d43
This commit is contained in:
parent
11e2f8a7e2
commit
d2514e6993
24
README.rst
24
README.rst
@ -7,7 +7,7 @@ OS-Faults
|
||||
The library does destructive actions inside an OpenStack cloud. It provides
|
||||
an abstraction layer over different types of cloud deployments. The actions
|
||||
are implemented as drivers (e.g. DevStack driver, Fuel driver, Libvirt driver,
|
||||
IPMI driver).
|
||||
IPMI driver, Universal driver).
|
||||
|
||||
* Free software: Apache license
|
||||
* Documentation: http://os-faults.readthedocs.io
|
||||
@ -197,7 +197,23 @@ Available actions:
|
||||
* `unplug` - unplug Service out of network
|
||||
* `plug` - plug Service into network
|
||||
|
||||
2. Node actions
|
||||
2. Container actions
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Get a container and restart it:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
cloud_management = os_faults.connect(cloud_config)
|
||||
container = cloud_management.get_container(name='neutron-api')
|
||||
container.restart()
|
||||
|
||||
Available actions:
|
||||
* `start` - start Container
|
||||
* `terminate` - terminate Container gracefully
|
||||
* `restart` - restart Container
|
||||
|
||||
3. Node actions
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Get all nodes in the cloud and reboot them:
|
||||
@ -214,7 +230,7 @@ Available actions:
|
||||
* `disconnect` - disable network with the specified name on all nodes
|
||||
* `connect` - enable network with the specified name on all nodes
|
||||
|
||||
3. Operate with nodes
|
||||
4. Operate with nodes
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Get all nodes where a service runs, pick one of them and reset:
|
||||
@ -233,7 +249,7 @@ Get nodes where l3-agent runs and disable the management network on them:
|
||||
nodes = cloud_management.get_nodes(fqdns=fqdns)
|
||||
nodes.disconnect(network_name='management')
|
||||
|
||||
4. Operate with services
|
||||
5. Operate with services
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Restart a service on a single node:
|
||||
|
106
examples/example_api_04.py
Normal file
106
examples/example_api_04.py
Normal file
@ -0,0 +1,106 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import logging
|
||||
import os_faults
|
||||
|
||||
|
||||
def main():
|
||||
# The cloud config details could be defined within the script or a
|
||||
# separate os-faults.yml file and then loaded to the script.
|
||||
# Ex. cloud_management = os_faults.connect(config_filename='os-faults.yml')
|
||||
cloud_config = {
|
||||
'cloud_management': {
|
||||
'driver': 'universal'
|
||||
},
|
||||
'node_discover': {
|
||||
'driver': 'node_list',
|
||||
'args': [
|
||||
{
|
||||
'ip': '192.0.10.6',
|
||||
'auth': {
|
||||
'username': 'heat-admin',
|
||||
'private_key_file': '/home/stack/.ssh/id_rsa',
|
||||
'become': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'ip': '192.0.10.8',
|
||||
'auth': {
|
||||
'username': 'heat-admin',
|
||||
'private_key_file': '/home/stack/.ssh/id_rsa',
|
||||
'become': True
|
||||
}
|
||||
},
|
||||
{
|
||||
'ip': '192.0.10.7',
|
||||
'auth': {
|
||||
'username': 'heat-admin',
|
||||
'private_key_file': '/home/stack/.ssh/id_rsa',
|
||||
'become': True
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
'services': {
|
||||
'openvswitch': {
|
||||
'driver': 'system_service',
|
||||
'args': {
|
||||
'service_name': 'openvswitch',
|
||||
'grep': 'openvswitch'
|
||||
}
|
||||
}
|
||||
},
|
||||
'containers': {
|
||||
'neutron_ovs_agent': {
|
||||
'driver': 'docker_container',
|
||||
'args': {
|
||||
'container_name': 'neutron_ovs_agent'
|
||||
}
|
||||
},
|
||||
'neutron_api': {
|
||||
'driver': 'docker_container',
|
||||
'args': {
|
||||
'container_name': 'neutron_api'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logging.info('# Create connection to the cloud')
|
||||
cloud_management = os_faults.connect(cloud_config)
|
||||
logging.info('Verify connection to the cloud')
|
||||
cloud_management.verify()
|
||||
|
||||
logging.info('Get nodes where openvswitch service is running')
|
||||
service = cloud_management.get_service(name='openvswitch')
|
||||
service_nodes = service.get_nodes()
|
||||
logging.info('Nodes: {}'.format(service_nodes))
|
||||
|
||||
logging.info('Stop openvswitch service on random node')
|
||||
random_node = service.get_nodes().pick()
|
||||
service.terminate(random_node)
|
||||
|
||||
logging.info('Get nodes where neutron_ovs_agent container is running')
|
||||
container = cloud_management.get_container(name='neutron_ovs_agent')
|
||||
container_nodes = container.get_nodes()
|
||||
logging.info('Nodes: {}'.format(container_nodes))
|
||||
|
||||
logging.info('Restart neutron_ovs_agent container on the '
|
||||
'following nodes: {}'.format(container_nodes))
|
||||
container.restart(container_nodes)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
|
||||
level=logging.INFO)
|
||||
main()
|
@ -163,6 +163,11 @@ def connect(cloud_config=None, config_filename=None):
|
||||
cloud_management.update_services(services)
|
||||
cloud_management.validate_services()
|
||||
|
||||
containers = cloud_config.get('containers')
|
||||
if containers:
|
||||
cloud_management.update_containers(containers)
|
||||
cloud_management.validate_containers()
|
||||
|
||||
node_discover_conf = cloud_config.get('node_discover')
|
||||
if node_discover_conf:
|
||||
node_discover = _init_driver(node_discover_conf)
|
||||
|
@ -30,6 +30,7 @@ LOG = logging.getLogger(__name__)
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class CloudManagement(base_driver.BaseDriver):
|
||||
SERVICES = {}
|
||||
CONTAINERS = {}
|
||||
SUPPORTED_NETWORKS = []
|
||||
NODE_CLS = node_collection.NodeCollection
|
||||
|
||||
@ -37,6 +38,7 @@ class CloudManagement(base_driver.BaseDriver):
|
||||
self.power_manager = power_management.PowerManager()
|
||||
self.node_discover = None
|
||||
self.services = copy.deepcopy(self.SERVICES)
|
||||
self.containers = copy.deepcopy(self.CONTAINERS)
|
||||
|
||||
def add_power_management(self, driver):
|
||||
self.power_manager.add_driver(driver)
|
||||
@ -47,11 +49,20 @@ class CloudManagement(base_driver.BaseDriver):
|
||||
def update_services(self, services):
|
||||
self.services.update(services)
|
||||
|
||||
def update_containers(self, containers):
|
||||
self.containers.update(containers)
|
||||
|
||||
def validate_services(self):
|
||||
for service_name, serive_conf in self.services.items():
|
||||
serive_cls = registry.get_driver(serive_conf["driver"])
|
||||
jsonschema.validate(serive_conf['args'], serive_cls.CONFIG_SCHEMA)
|
||||
|
||||
def validate_containers(self):
|
||||
for container_name, container_conf in self.containers.items():
|
||||
container_cls = registry.get_driver(container_conf["driver"])
|
||||
jsonschema.validate(container_conf['args'],
|
||||
container_cls.CONFIG_SCHEMA)
|
||||
|
||||
@abc.abstractmethod
|
||||
def verify(self):
|
||||
"""Verify connection to the cloud.
|
||||
@ -97,6 +108,23 @@ class CloudManagement(base_driver.BaseDriver):
|
||||
service_name=name, config=config["args"],
|
||||
hosts=config.get('hosts'))
|
||||
|
||||
def get_container(self, name):
|
||||
"""Get container with specified name
|
||||
|
||||
:param name: name of the container
|
||||
:return: Container
|
||||
"""
|
||||
if name not in self.containers:
|
||||
raise error.ContainerError(
|
||||
'{} driver does not support {!r} container'.format(
|
||||
self.NAME.title(), name))
|
||||
|
||||
config = self.containers[name]
|
||||
klazz = registry.get_driver(config["driver"])
|
||||
return klazz(node_cls=self.NODE_CLS, cloud_management=self,
|
||||
container_name=name, config=config["args"],
|
||||
hosts=config.get('hosts'))
|
||||
|
||||
@abc.abstractmethod
|
||||
def execute_on_cloud(self, hosts, task, raise_on_error=True):
|
||||
"""Execute task on specified hosts within the cloud.
|
||||
|
74
os_faults/api/container.py
Normal file
74
os_faults/api/container.py
Normal file
@ -0,0 +1,74 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import abc
|
||||
|
||||
import six
|
||||
|
||||
from os_faults.api import base_driver
|
||||
from os_faults.api.utils import public
|
||||
|
||||
|
||||
@six.add_metaclass(abc.ABCMeta)
|
||||
class Container(base_driver.BaseDriver):
|
||||
|
||||
def __init__(self, container_name, config, node_cls, cloud_management,
|
||||
hosts=None):
|
||||
self.container_name = container_name
|
||||
self.config = config
|
||||
self.node_cls = node_cls
|
||||
self.cloud_management = cloud_management
|
||||
self.hosts = hosts
|
||||
|
||||
@abc.abstractmethod
|
||||
def discover_nodes(self):
|
||||
"""Discover nodes where this Container is running
|
||||
|
||||
:returns: NodesCollection
|
||||
"""
|
||||
|
||||
def get_nodes(self):
|
||||
"""Get nodes where this Container is running
|
||||
|
||||
:returns: NodesCollection
|
||||
"""
|
||||
if self.hosts is not None:
|
||||
nodes = self.cloud_management.get_nodes()
|
||||
hosts = [h for h in nodes.hosts if h.ip in self.hosts]
|
||||
return self.node_cls(cloud_management=self.cloud_management,
|
||||
hosts=hosts)
|
||||
return self.discover_nodes()
|
||||
|
||||
@public
|
||||
def start(self, nodes=None):
|
||||
"""Start Container on all nodes or on particular subset
|
||||
|
||||
:param nodes: NodesCollection
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@public
|
||||
def terminate(self, nodes=None):
|
||||
"""Terminate Container gracefully on all nodes or on particular subset
|
||||
|
||||
:param nodes: NodesCollection
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@public
|
||||
def restart(self, nodes=None):
|
||||
"""Restart Container on all nodes or on particular subset
|
||||
|
||||
:param nodes: NodesCollection
|
||||
"""
|
||||
raise NotImplementedError
|
@ -32,6 +32,10 @@ class ServiceError(OSFError):
|
||||
"""Base Error class for Service API"""
|
||||
|
||||
|
||||
class ContainerError(OSFError):
|
||||
"""Base Error class for Container API"""
|
||||
|
||||
|
||||
class NodeCollectionError(OSFError):
|
||||
"""Base Error class for NodeCollection API"""
|
||||
|
||||
|
@ -14,6 +14,7 @@ import inspect
|
||||
import logging
|
||||
import re
|
||||
|
||||
from os_faults.api import container as container_pkg
|
||||
from os_faults.api import error
|
||||
from os_faults.api import node_collection as node_collection_pkg
|
||||
from os_faults.api import service as service_pkg
|
||||
@ -53,6 +54,8 @@ ANYTHING = {'all'}
|
||||
NODE_ALIASES_PATTERN = '|'.join(RANDOMNESS | ANYTHING)
|
||||
SERVICE_ACTIONS = list_actions(service_pkg.Service)
|
||||
SERVICE_ACTIONS_PATTERN = '|'.join(SERVICE_ACTIONS)
|
||||
CONTAINER_ACTIONS = list_actions(container_pkg.Container)
|
||||
CONTAINER_ACTIONS_PATTERN = '|'.join(CONTAINER_ACTIONS)
|
||||
NODE_ACTIONS = list_actions(node_collection_pkg.NodeCollection)
|
||||
NODE_ACTIONS_PATTERN = '|'.join(NODE_ACTIONS)
|
||||
|
||||
@ -62,6 +65,11 @@ PATTERNS = [
|
||||
'(\s+on(\s+(?P<node>\S+))?\s+nodes?)?'
|
||||
'(\s+for\s+(?P<duration>\d+)\s+seconds)?' %
|
||||
SERVICE_ACTIONS_PATTERN),
|
||||
re.compile('(?P<action>%s)'
|
||||
'\s+(?P<container>\S+)\s+container'
|
||||
'(\s+on(\s+(?P<node>\S+))?\s+nodes?)?'
|
||||
'(\s+for\s+(?P<duration>\d+)\s+seconds)?' %
|
||||
CONTAINER_ACTIONS_PATTERN),
|
||||
re.compile('(?P<action>%s)'
|
||||
'(\s+(?P<network>\w+)\s+network\s+on)?'
|
||||
'(\s+(?P<target>\w+)'
|
||||
@ -88,6 +96,7 @@ def execute(destructor, command):
|
||||
|
||||
action = groups.get('action').replace(' ', '_')
|
||||
service_name = groups.get('service')
|
||||
container_name = groups.get('container')
|
||||
node_name = groups.get('node')
|
||||
network_name = groups.get('network')
|
||||
target = groups.get('target')
|
||||
@ -113,6 +122,38 @@ def execute(destructor, command):
|
||||
else: # node actions
|
||||
nodes = service.get_nodes()
|
||||
|
||||
if node_name in RANDOMNESS:
|
||||
nodes = nodes.pick()
|
||||
|
||||
kwargs = {}
|
||||
if network_name:
|
||||
kwargs['network_name'] = network_name
|
||||
if target:
|
||||
kwargs['target'] = target
|
||||
kwargs['duration'] = int(duration)
|
||||
|
||||
fn = getattr(nodes, action)
|
||||
fn(**kwargs)
|
||||
elif container_name:
|
||||
container = destructor.get_container(name=container_name)
|
||||
|
||||
if action in CONTAINER_ACTIONS:
|
||||
|
||||
kwargs = {}
|
||||
if node_name in RANDOMNESS:
|
||||
kwargs['nodes'] = container.get_nodes().pick()
|
||||
elif node_name and node_name not in ANYTHING:
|
||||
kwargs['nodes'] = destructor.get_nodes(fqdns=[node_name])
|
||||
|
||||
if duration:
|
||||
kwargs['sec'] = int(duration)
|
||||
|
||||
fn = getattr(container, action)
|
||||
fn(**kwargs)
|
||||
|
||||
else: # node actions
|
||||
nodes = container.get_nodes()
|
||||
|
||||
if node_name in RANDOMNESS:
|
||||
nodes = nodes.pick()
|
||||
|
||||
|
0
os_faults/drivers/containers/__init__.py
Normal file
0
os_faults/drivers/containers/__init__.py
Normal file
107
os_faults/drivers/containers/docker.py
Normal file
107
os_faults/drivers/containers/docker.py
Normal file
@ -0,0 +1,107 @@
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
# implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
|
||||
from os_faults.ansible import executor
|
||||
from os_faults.api import container
|
||||
from os_faults.api import error
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DockerContainers(container.Container):
|
||||
"""Docker container
|
||||
|
||||
This is docker container driver for any docker containers supported by
|
||||
Ansible. Please refer to Ansible documentation
|
||||
https://docs.ansible.com/ansible/latest/modules/docker_container_module.html
|
||||
for the whole list.
|
||||
|
||||
**Example configuration:**
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
containers:
|
||||
app:
|
||||
driver: docker_container
|
||||
args:
|
||||
container_name: app
|
||||
|
||||
parameters:
|
||||
|
||||
- **container_name** - name of the container
|
||||
"""
|
||||
NAME = 'docker_container'
|
||||
DESCRIPTION = 'Docker container'
|
||||
CONFIG_SCHEMA = {
|
||||
'type': 'object',
|
||||
'properties': {
|
||||
'container_name': {'type': 'string'},
|
||||
},
|
||||
'required': ['container_name'],
|
||||
'additionalProperties': False,
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DockerContainers, self).__init__(*args, **kwargs)
|
||||
self.container_name = self.config['container_name']
|
||||
|
||||
def _run_task(self, nodes, task, message):
|
||||
nodes = nodes if nodes is not None else self.get_nodes()
|
||||
if len(nodes) == 0:
|
||||
raise error.ContainerError(
|
||||
'Container %s is not found on any nodes' % self.container_name)
|
||||
|
||||
LOG.info('%s container %s on nodes: %s',
|
||||
message, self.container_name, nodes.get_ips())
|
||||
|
||||
return self.cloud_management.execute_on_cloud(nodes.hosts, task)
|
||||
|
||||
def discover_nodes(self):
|
||||
nodes = self.cloud_management.get_nodes()
|
||||
cmd = 'bash -c "docker ps | grep \'{}\'"'.format(self.container_name)
|
||||
results = self.cloud_management.execute_on_cloud(
|
||||
nodes.hosts, {'command': cmd}, False)
|
||||
success_ips = [r.host for r in results
|
||||
if r.status == executor.STATUS_OK]
|
||||
hosts = [h for h in nodes.hosts if h.ip in success_ips]
|
||||
LOG.debug('Container %s is discovered on nodes %s',
|
||||
self.container_name, hosts)
|
||||
return self.node_cls(cloud_management=self.cloud_management,
|
||||
hosts=hosts)
|
||||
|
||||
def start(self, nodes=None):
|
||||
task = {
|
||||
'docker_container': {
|
||||
'name': self.container_name, 'state': 'started'
|
||||
},
|
||||
}
|
||||
self._run_task(nodes, task, 'Start')
|
||||
|
||||
def terminate(self, nodes=None):
|
||||
task = {
|
||||
'docker_container': {
|
||||
'name': self.container_name, 'state': 'stopped',
|
||||
},
|
||||
}
|
||||
self._run_task(nodes, task, 'Terminate')
|
||||
|
||||
def restart(self, nodes=None):
|
||||
task = {
|
||||
'docker_container': {
|
||||
'name': self.container_name, 'state': 'started',
|
||||
'restart': 'yes'
|
||||
},
|
||||
}
|
||||
self._run_task(nodes, task, 'Restart')
|
Loading…
x
Reference in New Issue
Block a user