Add docker containers support

A support for the containers is required for the current releases of
OpenStack.
Add the ability to 'start', 'terminate' and 'restart' the containers.

Change-Id: Ib2beebcfa7017a9b2bb8f16dea49beef6db79d43
This commit is contained in:
Maxim Babushkin 2018-08-06 12:11:09 +03:00
parent 11e2f8a7e2
commit d2514e6993
9 changed files with 385 additions and 4 deletions

View File

@ -7,7 +7,7 @@ OS-Faults
The library does destructive actions inside an OpenStack cloud. It provides
an abstraction layer over different types of cloud deployments. The actions
are implemented as drivers (e.g. DevStack driver, Fuel driver, Libvirt driver,
IPMI driver).
IPMI driver, Universal driver).
* Free software: Apache license
* Documentation: http://os-faults.readthedocs.io
@ -197,7 +197,23 @@ Available actions:
* `unplug` - unplug Service out of network
* `plug` - plug Service into network
2. Node actions
2. Container actions
~~~~~~~~~~~~~~~~~~~~
Get a container and restart it:
.. code-block:: python
cloud_management = os_faults.connect(cloud_config)
container = cloud_management.get_container(name='neutron-api')
container.restart()
Available actions:
* `start` - start Container
* `terminate` - terminate Container gracefully
* `restart` - restart Container
3. Node actions
~~~~~~~~~~~~~~~
Get all nodes in the cloud and reboot them:
@ -214,7 +230,7 @@ Available actions:
* `disconnect` - disable network with the specified name on all nodes
* `connect` - enable network with the specified name on all nodes
3. Operate with nodes
4. Operate with nodes
~~~~~~~~~~~~~~~~~~~~~
Get all nodes where a service runs, pick one of them and reset:
@ -233,7 +249,7 @@ Get nodes where l3-agent runs and disable the management network on them:
nodes = cloud_management.get_nodes(fqdns=fqdns)
nodes.disconnect(network_name='management')
4. Operate with services
5. Operate with services
~~~~~~~~~~~~~~~~~~~~~~~~
Restart a service on a single node:

106
examples/example_api_04.py Normal file
View File

@ -0,0 +1,106 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os_faults
def main():
# The cloud config details could be defined within the script or a
# separate os-faults.yml file and then loaded to the script.
# Ex. cloud_management = os_faults.connect(config_filename='os-faults.yml')
cloud_config = {
'cloud_management': {
'driver': 'universal'
},
'node_discover': {
'driver': 'node_list',
'args': [
{
'ip': '192.0.10.6',
'auth': {
'username': 'heat-admin',
'private_key_file': '/home/stack/.ssh/id_rsa',
'become': True
}
},
{
'ip': '192.0.10.8',
'auth': {
'username': 'heat-admin',
'private_key_file': '/home/stack/.ssh/id_rsa',
'become': True
}
},
{
'ip': '192.0.10.7',
'auth': {
'username': 'heat-admin',
'private_key_file': '/home/stack/.ssh/id_rsa',
'become': True
}
}
]
},
'services': {
'openvswitch': {
'driver': 'system_service',
'args': {
'service_name': 'openvswitch',
'grep': 'openvswitch'
}
}
},
'containers': {
'neutron_ovs_agent': {
'driver': 'docker_container',
'args': {
'container_name': 'neutron_ovs_agent'
}
},
'neutron_api': {
'driver': 'docker_container',
'args': {
'container_name': 'neutron_api'
}
}
}
}
logging.info('# Create connection to the cloud')
cloud_management = os_faults.connect(cloud_config)
logging.info('Verify connection to the cloud')
cloud_management.verify()
logging.info('Get nodes where openvswitch service is running')
service = cloud_management.get_service(name='openvswitch')
service_nodes = service.get_nodes()
logging.info('Nodes: {}'.format(service_nodes))
logging.info('Stop openvswitch service on random node')
random_node = service.get_nodes().pick()
service.terminate(random_node)
logging.info('Get nodes where neutron_ovs_agent container is running')
container = cloud_management.get_container(name='neutron_ovs_agent')
container_nodes = container.get_nodes()
logging.info('Nodes: {}'.format(container_nodes))
logging.info('Restart neutron_ovs_agent container on the '
'following nodes: {}'.format(container_nodes))
container.restart(container_nodes)
if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.INFO)
main()

View File

@ -163,6 +163,11 @@ def connect(cloud_config=None, config_filename=None):
cloud_management.update_services(services)
cloud_management.validate_services()
containers = cloud_config.get('containers')
if containers:
cloud_management.update_containers(containers)
cloud_management.validate_containers()
node_discover_conf = cloud_config.get('node_discover')
if node_discover_conf:
node_discover = _init_driver(node_discover_conf)

View File

@ -30,6 +30,7 @@ LOG = logging.getLogger(__name__)
@six.add_metaclass(abc.ABCMeta)
class CloudManagement(base_driver.BaseDriver):
SERVICES = {}
CONTAINERS = {}
SUPPORTED_NETWORKS = []
NODE_CLS = node_collection.NodeCollection
@ -37,6 +38,7 @@ class CloudManagement(base_driver.BaseDriver):
self.power_manager = power_management.PowerManager()
self.node_discover = None
self.services = copy.deepcopy(self.SERVICES)
self.containers = copy.deepcopy(self.CONTAINERS)
def add_power_management(self, driver):
self.power_manager.add_driver(driver)
@ -47,11 +49,20 @@ class CloudManagement(base_driver.BaseDriver):
def update_services(self, services):
self.services.update(services)
def update_containers(self, containers):
self.containers.update(containers)
def validate_services(self):
for service_name, serive_conf in self.services.items():
serive_cls = registry.get_driver(serive_conf["driver"])
jsonschema.validate(serive_conf['args'], serive_cls.CONFIG_SCHEMA)
def validate_containers(self):
for container_name, container_conf in self.containers.items():
container_cls = registry.get_driver(container_conf["driver"])
jsonschema.validate(container_conf['args'],
container_cls.CONFIG_SCHEMA)
@abc.abstractmethod
def verify(self):
"""Verify connection to the cloud.
@ -97,6 +108,23 @@ class CloudManagement(base_driver.BaseDriver):
service_name=name, config=config["args"],
hosts=config.get('hosts'))
def get_container(self, name):
"""Get container with specified name
:param name: name of the container
:return: Container
"""
if name not in self.containers:
raise error.ContainerError(
'{} driver does not support {!r} container'.format(
self.NAME.title(), name))
config = self.containers[name]
klazz = registry.get_driver(config["driver"])
return klazz(node_cls=self.NODE_CLS, cloud_management=self,
container_name=name, config=config["args"],
hosts=config.get('hosts'))
@abc.abstractmethod
def execute_on_cloud(self, hosts, task, raise_on_error=True):
"""Execute task on specified hosts within the cloud.

View File

@ -0,0 +1,74 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import six
from os_faults.api import base_driver
from os_faults.api.utils import public
@six.add_metaclass(abc.ABCMeta)
class Container(base_driver.BaseDriver):
def __init__(self, container_name, config, node_cls, cloud_management,
hosts=None):
self.container_name = container_name
self.config = config
self.node_cls = node_cls
self.cloud_management = cloud_management
self.hosts = hosts
@abc.abstractmethod
def discover_nodes(self):
"""Discover nodes where this Container is running
:returns: NodesCollection
"""
def get_nodes(self):
"""Get nodes where this Container is running
:returns: NodesCollection
"""
if self.hosts is not None:
nodes = self.cloud_management.get_nodes()
hosts = [h for h in nodes.hosts if h.ip in self.hosts]
return self.node_cls(cloud_management=self.cloud_management,
hosts=hosts)
return self.discover_nodes()
@public
def start(self, nodes=None):
"""Start Container on all nodes or on particular subset
:param nodes: NodesCollection
"""
raise NotImplementedError
@public
def terminate(self, nodes=None):
"""Terminate Container gracefully on all nodes or on particular subset
:param nodes: NodesCollection
"""
raise NotImplementedError
@public
def restart(self, nodes=None):
"""Restart Container on all nodes or on particular subset
:param nodes: NodesCollection
"""
raise NotImplementedError

View File

@ -32,6 +32,10 @@ class ServiceError(OSFError):
"""Base Error class for Service API"""
class ContainerError(OSFError):
"""Base Error class for Container API"""
class NodeCollectionError(OSFError):
"""Base Error class for NodeCollection API"""

View File

@ -14,6 +14,7 @@ import inspect
import logging
import re
from os_faults.api import container as container_pkg
from os_faults.api import error
from os_faults.api import node_collection as node_collection_pkg
from os_faults.api import service as service_pkg
@ -53,6 +54,8 @@ ANYTHING = {'all'}
NODE_ALIASES_PATTERN = '|'.join(RANDOMNESS | ANYTHING)
SERVICE_ACTIONS = list_actions(service_pkg.Service)
SERVICE_ACTIONS_PATTERN = '|'.join(SERVICE_ACTIONS)
CONTAINER_ACTIONS = list_actions(container_pkg.Container)
CONTAINER_ACTIONS_PATTERN = '|'.join(CONTAINER_ACTIONS)
NODE_ACTIONS = list_actions(node_collection_pkg.NodeCollection)
NODE_ACTIONS_PATTERN = '|'.join(NODE_ACTIONS)
@ -62,6 +65,11 @@ PATTERNS = [
'(\s+on(\s+(?P<node>\S+))?\s+nodes?)?'
'(\s+for\s+(?P<duration>\d+)\s+seconds)?' %
SERVICE_ACTIONS_PATTERN),
re.compile('(?P<action>%s)'
'\s+(?P<container>\S+)\s+container'
'(\s+on(\s+(?P<node>\S+))?\s+nodes?)?'
'(\s+for\s+(?P<duration>\d+)\s+seconds)?' %
CONTAINER_ACTIONS_PATTERN),
re.compile('(?P<action>%s)'
'(\s+(?P<network>\w+)\s+network\s+on)?'
'(\s+(?P<target>\w+)'
@ -88,6 +96,7 @@ def execute(destructor, command):
action = groups.get('action').replace(' ', '_')
service_name = groups.get('service')
container_name = groups.get('container')
node_name = groups.get('node')
network_name = groups.get('network')
target = groups.get('target')
@ -113,6 +122,38 @@ def execute(destructor, command):
else: # node actions
nodes = service.get_nodes()
if node_name in RANDOMNESS:
nodes = nodes.pick()
kwargs = {}
if network_name:
kwargs['network_name'] = network_name
if target:
kwargs['target'] = target
kwargs['duration'] = int(duration)
fn = getattr(nodes, action)
fn(**kwargs)
elif container_name:
container = destructor.get_container(name=container_name)
if action in CONTAINER_ACTIONS:
kwargs = {}
if node_name in RANDOMNESS:
kwargs['nodes'] = container.get_nodes().pick()
elif node_name and node_name not in ANYTHING:
kwargs['nodes'] = destructor.get_nodes(fqdns=[node_name])
if duration:
kwargs['sec'] = int(duration)
fn = getattr(container, action)
fn(**kwargs)
else: # node actions
nodes = container.get_nodes()
if node_name in RANDOMNESS:
nodes = nodes.pick()

View File

View File

@ -0,0 +1,107 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from os_faults.ansible import executor
from os_faults.api import container
from os_faults.api import error
LOG = logging.getLogger(__name__)
class DockerContainers(container.Container):
"""Docker container
This is docker container driver for any docker containers supported by
Ansible. Please refer to Ansible documentation
https://docs.ansible.com/ansible/latest/modules/docker_container_module.html
for the whole list.
**Example configuration:**
.. code-block:: yaml
containers:
app:
driver: docker_container
args:
container_name: app
parameters:
- **container_name** - name of the container
"""
NAME = 'docker_container'
DESCRIPTION = 'Docker container'
CONFIG_SCHEMA = {
'type': 'object',
'properties': {
'container_name': {'type': 'string'},
},
'required': ['container_name'],
'additionalProperties': False,
}
def __init__(self, *args, **kwargs):
super(DockerContainers, self).__init__(*args, **kwargs)
self.container_name = self.config['container_name']
def _run_task(self, nodes, task, message):
nodes = nodes if nodes is not None else self.get_nodes()
if len(nodes) == 0:
raise error.ContainerError(
'Container %s is not found on any nodes' % self.container_name)
LOG.info('%s container %s on nodes: %s',
message, self.container_name, nodes.get_ips())
return self.cloud_management.execute_on_cloud(nodes.hosts, task)
def discover_nodes(self):
nodes = self.cloud_management.get_nodes()
cmd = 'bash -c "docker ps | grep \'{}\'"'.format(self.container_name)
results = self.cloud_management.execute_on_cloud(
nodes.hosts, {'command': cmd}, False)
success_ips = [r.host for r in results
if r.status == executor.STATUS_OK]
hosts = [h for h in nodes.hosts if h.ip in success_ips]
LOG.debug('Container %s is discovered on nodes %s',
self.container_name, hosts)
return self.node_cls(cloud_management=self.cloud_management,
hosts=hosts)
def start(self, nodes=None):
task = {
'docker_container': {
'name': self.container_name, 'state': 'started'
},
}
self._run_task(nodes, task, 'Start')
def terminate(self, nodes=None):
task = {
'docker_container': {
'name': self.container_name, 'state': 'stopped',
},
}
self._run_task(nodes, task, 'Terminate')
def restart(self, nodes=None):
task = {
'docker_container': {
'name': self.container_name, 'state': 'started',
'restart': 'yes'
},
}
self._run_task(nodes, task, 'Restart')