Retire Tripleo: remove repo content
TripleO project is retiring - https://review.opendev.org/c/openstack/governance/+/905145 this commit remove the content of this project repo Change-Id: I5080dd23acbf6beca58e70e6ae1f1bc3d1843161
This commit is contained in:
parent
74eec6791c
commit
a1119eb2d8
62
.gitignore
vendored
62
.gitignore
vendored
@ -1,62 +0,0 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*,cover
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
# Sphinx documentation
|
||||
doc/build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# virtualenv
|
||||
.venv/
|
||||
|
||||
# Files created by releasenotes build
|
||||
releasenotes/build
|
||||
|
202
LICENSE
202
LICENSE
@ -1,202 +0,0 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "{}"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright {yyyy} {name of copyright owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
144
README.md
144
README.md
@ -1,144 +0,0 @@
|
||||
Utility roles and docs for TripleO
|
||||
==================================
|
||||
|
||||
These Ansible roles are a set of useful tools to be used on top of TripleO
|
||||
deployments. They can also be used together with
|
||||
[tripleo-quickstart](https://github.com/openstack/tripleo-quickstart) (and
|
||||
[tripleo-quickstart-extras](https://github.com/openstack/tripleo-quickstart-extras)).
|
||||
|
||||
The documentation of each role is located in the individual role folders.
|
||||
General usage information about *tripleo-quickstart* can be found in the
|
||||
[project documentation](https://docs.openstack.org/tripleo-quickstart/latest/).
|
||||
|
||||
Using the playbook on an existing TripleO environment
|
||||
-----------------------------------------------------
|
||||
|
||||
The playbooks can be launched directly from the **undercloud** machine of the
|
||||
**TripleO** deployment. The described steps are expected to be run inside the
|
||||
*/home/stack* directory.
|
||||
|
||||
First of all a clone of the *tripleo-ha-utils* repository must be
|
||||
created:
|
||||
|
||||
git clone https://github.com/openstack/tripleo-ha-utils
|
||||
|
||||
then three environment variables needs to be exported, pointing three files:
|
||||
|
||||
export ANSIBLE_CONFIG="/home/stack/ansible.cfg"
|
||||
export ANSIBLE_INVENTORY="/home/stack/hosts"
|
||||
export ANSIBLE_SSH_ARGS="-F /home/stack/ssh.config.ansible"
|
||||
|
||||
These files are:
|
||||
|
||||
**ansible.cfg** which must contain at least these lines:
|
||||
|
||||
[defaults]
|
||||
roles_path = /home/stack/tripleo-ha-utils/roles
|
||||
|
||||
**hosts** which must be configured depending on the deployed environment,
|
||||
reflecting these sections:
|
||||
|
||||
undercloud ansible_host=undercloud ansible_user=stack ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
overcloud-compute-1 ansible_host=overcloud-compute-1 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
overcloud-compute-0 ansible_host=overcloud-compute-0 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
overcloud-controller-2 ansible_host=overcloud-controller-2 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
overcloud-controller-1 ansible_host=overcloud-controller-1 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
overcloud-controller-0 ansible_host=overcloud-controller-0 ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
|
||||
[compute]
|
||||
overcloud-compute-1
|
||||
overcloud-compute-0
|
||||
|
||||
[undercloud]
|
||||
undercloud
|
||||
|
||||
[overcloud]
|
||||
overcloud-compute-1
|
||||
overcloud-compute-0
|
||||
overcloud-controller-2
|
||||
overcloud-controller-1
|
||||
overcloud-controller-0
|
||||
|
||||
[controller]
|
||||
overcloud-controller-2
|
||||
overcloud-controller-1
|
||||
overcloud-controller-0
|
||||
|
||||
**ssh.config.ansible** which can be generated by these code lines:
|
||||
|
||||
cat /home/stack/.ssh/id_rsa.pub >> /home/stack/.ssh/authorized_keys
|
||||
echo -e "Host undercloud\n Hostname 127.0.0.1\n IdentityFile /home/stack/.ssh/id_rsa\n User stack\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n" > ssh.config.ansible
|
||||
. /home/stack/stackrc
|
||||
openstack server list -c Name -c Networks | awk '/ctlplane/ {print $2, $4}' | sed s/ctlplane=//g | while read node; do node_name=$(echo $node | cut -f 1 -d " "); node_ip=$(echo $node | cut -f 2 -d " "); echo -e "Host $node_name\n Hostname $node_ip\n IdentityFile /home/stack/.ssh/id_rsa\n User heat-admin\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n"; done >> ssh.config.ansible
|
||||
|
||||
|
||||
It can *optionally* contain specific per-host connection options, like these:
|
||||
|
||||
...
|
||||
...
|
||||
Host overcloud-controller-0
|
||||
ProxyCommand ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o ConnectTimeout=60 -F /home/stack/ssh.config.ansible undercloud -W 192.168.24.16:22
|
||||
IdentityFile /home/stack/.ssh/id_rsa
|
||||
User heat-admin
|
||||
StrictHostKeyChecking no
|
||||
UserKnownHostsFile=/dev/null
|
||||
...
|
||||
...
|
||||
|
||||
In this example to connect to overcloud-controller-0 ansible will use
|
||||
*undercloud* as a *ProxyHost*.
|
||||
|
||||
With this setup in place is then possible to launch the playbook:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release=newton
|
||||
|
||||
Using the playbooks on tripleo-quickstart provided environment
|
||||
--------------------------------------------------------------
|
||||
|
||||
*tripleo-ha-utils* project can be set as a *tripleo-quickstart*
|
||||
extra requirement, so all the code will be automatically downloaded and
|
||||
available.
|
||||
Inside the requirements.txt file you will need a line pointing to this repo:
|
||||
|
||||
echo "https://github.com/openstack/tripleo-ha-utils/#egg=tripleo-ha-utils" >> tripleo-quickstart/quickstart-extras-requirements.txt
|
||||
|
||||
Supposing the environment was successfully provided with a previous quickstart
|
||||
execution, to use one of the utils playbook a command line like this one can be
|
||||
used:
|
||||
|
||||
./quickstart.sh \
|
||||
--retain-inventory \
|
||||
--teardown none \
|
||||
--playbook overcloud-instance-ha.yml \
|
||||
--working-dir /path/to/workdir \
|
||||
--config /path/to/config.yml \
|
||||
--release <RELEASE> \
|
||||
--tags all \
|
||||
<VIRTHOST HOSTNAME or IP>
|
||||
|
||||
Basically this command:
|
||||
|
||||
- **Keep** existing data on the repo (by keeping the inventory and all the
|
||||
virtual machines)
|
||||
- Uses the *overcloud-instance-ha.yml* playbook
|
||||
- Uses the same workdir where quickstart was first deployed
|
||||
- Select the specific config file (optionally)
|
||||
- Specifies the release (mitaka, newton, or “master” for ocata)
|
||||
- Performs all the tasks in the playbook overcloud-instance-ha.yml
|
||||
|
||||
**Important note**
|
||||
|
||||
You might need to export *ANSIBLE_SSH_ARGS* with the path of the
|
||||
*ssh.config.ansible* file to make the command work, like this:
|
||||
|
||||
export ANSIBLE_SSH_ARGS="-F /path/to/quickstart/workdir/ssh.config.ansible"
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
Licensed under the Apache License, Version 2.0. You may obtain a copy of the License at [http://www.apache.org/licenses/LICENSE-2.0]()
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Raoul Scarazzini <rasca@redhat.com>
|
10
README.rst
Normal file
10
README.rst
Normal file
@ -0,0 +1,10 @@
|
||||
This project is no longer maintained.
|
||||
|
||||
The contents of this repository are still available in the Git
|
||||
source code management system. To see the contents of this
|
||||
repository before it reached its end of life, please check out the
|
||||
previous commit with "git checkout HEAD^1".
|
||||
|
||||
For any further questions, please email
|
||||
openstack-discuss@lists.openstack.org or join #openstack-dev on
|
||||
OFTC.
|
@ -1,174 +0,0 @@
|
||||
Multi Virtual Undercloud
|
||||
========================
|
||||
|
||||
This document describes a way to deploy multiple virtual undercloud on the same
|
||||
host. This is mainly for environments in which you want to manage multiple
|
||||
baremetal overclouds without having one baremetal machine dedicated for each one
|
||||
you deploy.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
**Physical switches**
|
||||
|
||||
The switch(es) must support VLAN tagging and all the ports must be configured in
|
||||
trunk, so that the dedicated network interface on the physical host (in the
|
||||
examples the secondary interface, eth1) is able to offer PXE and dhcp to all the
|
||||
overcloud machines via undercloud virtual machine's bridged interface.
|
||||
|
||||
**Host hardware**
|
||||
|
||||
The main requirement to make this kind of setup working is to have a host
|
||||
powerful enough to run virtual machines with at least 16GB of RAM and 8 cpus.
|
||||
The more power you have, the more undercloud machines you can spawn without
|
||||
having impact on performances.
|
||||
|
||||
**Host Network topology**
|
||||
|
||||
Host is reachable via ssh from the machine launching quickstart and configured
|
||||
with two main network interfaces:
|
||||
|
||||
- **eth0**: bridged on **br0**, pointing to LAN (underclouds will own an IP to
|
||||
be reachable via ssh);
|
||||
- **eth1**: connected to the dedicated switch that supports all the VLANs that
|
||||
will be used in the deployment;
|
||||
|
||||
Over eth1, for each undercloud virtual machine two VLAN interfaces are created,
|
||||
with associated bridges:
|
||||
|
||||
- **Control plane network bridge** (i.e. br2100) built over VLAN interface (i.e.
|
||||
eth1.2100) that will be eth1 on the undercloud virtual machine, used by
|
||||
TripleO as br-ctlplane;
|
||||
- **External network bridge** (i.e. br2105) built over VLAN interface (i.e.
|
||||
eth1.2105) that will be eth2 on the undercloud virtual machine, used by
|
||||
TripleO as external network device;
|
||||
|
||||
![network-topology](./multi-virtual-undercloud_network-topology.png "Multi Virtual Undercloud - Network Topology")
|
||||
|
||||
Quickstart configuration
|
||||
------------------------
|
||||
|
||||
Virtual undercloud machine is treated as a baremetal one and the Quickstart
|
||||
command relies on the baremetal undercloud role, and its playbook.
|
||||
This means that any playbook similar to [baremetal-undercloud.yml](https://github.com/openstack/tripleo-quickstart-extras/blob/master/playbooks/baremetal-undercloud.yml "Baremetal undercloud playbook") should be okay.
|
||||
|
||||
The configuration file has two specific sections that needs attention:
|
||||
|
||||
- Additional interface for external network to route overcloud traffic:
|
||||
|
||||
```yaml
|
||||
undercloud_networks:
|
||||
external:
|
||||
address: 172.20.0.254
|
||||
netmask: 255.255.255.0
|
||||
device_type: ethernet
|
||||
device_name: eth2
|
||||
```
|
||||
|
||||
**NOTE:** in this configuration eth2 is acting also as a default router for
|
||||
the external network.
|
||||
|
||||
- Baremetal provision script, which will be an helper for the
|
||||
[multi-virtual-undercloud.sh](./multi-virtual-undercloud.sh) script on the <VIRTHOST>:
|
||||
|
||||
```yaml
|
||||
baremetal_provisioning_script: "/path/to/multi-virtual-undercloud-provisioner.sh <VIRTHOST> <DISTRO> <UNDERCLOUD-NAME> <UNDERCLOUD IP> <UNDERCLOUD NETMASK> <UNDERCLOUD GATEWAY> <CTLPLANEV LAN> <EXTERNAL NETWORK VLAN>"
|
||||
```
|
||||
|
||||
The supported parameters, with the exception of VIRTHOST, are the same ones
|
||||
that are passed to the script that lives (and runs) on the VIRTHOST,
|
||||
*multi-virtual-undercloud.sh*.
|
||||
This helper script launches the remote command on VIRTHOST host and ensures
|
||||
that the machine gets reachable via ssh before proceeding.
|
||||
|
||||
The multi virtual undercloud script
|
||||
-----------------------------------
|
||||
|
||||
The [multi-virtual-undercloud.sh](./multi-virtual-undercloud.sh) script is
|
||||
placed on the VIRTHOST and needs these parameters:
|
||||
|
||||
1. **DISTRO**: this must be the name (without extension) of one of the images
|
||||
present inside the */images* dir on the VIRTHOST;
|
||||
2. **VMNAME**: the name of the undercloud virtual machine (the name that will be
|
||||
used by libvirt);
|
||||
3. **VMETH0IP**: IP of the virtual undercloud primary interface to wich
|
||||
quickstart (and users) will connect via ssh;
|
||||
4. **VMETH0NM**: Netmask of the virtual undercloud primary interface;
|
||||
5. **VMETH0GW**: Gateway of the virtual undercloud primary interface;
|
||||
6. **VMSSHKEY**: Public key to be enabled on the virtual undercloud;
|
||||
7. **UCVLAN**: VLAN of the overcloud's ctlplane network;
|
||||
8. **UCEXTVLAN**: VLAN of the overcloud's external network;
|
||||
|
||||
The script's actions are basically:
|
||||
|
||||
1. Destroy and undefine any existing machine named as the one we want to create;
|
||||
2. Prepare the image on which the virtual undercloud will be created by copying
|
||||
the available distro image and preparing it to be ready for the TripleO
|
||||
installation, it fix size, network interfaces, packages and ssh keys;
|
||||
3. Create and launch the virtual undercloud machine;
|
||||
|
||||
**Note**: on the VIRTHOST there must exist an */images* directory containing
|
||||
images suitable for the deploy.
|
||||
Having this directory structure:
|
||||
|
||||
```console
|
||||
[root@VIRTHOST ~]# ls -l /images/
|
||||
total 1898320
|
||||
lrwxrwxrwx. 1 root root 34 14 feb 09.20 centos-7.qcow2 -> CentOS-7-x86_64-GenericCloud.qcow2
|
||||
-rw-r--r--. 1 root root 1361182720 15 feb 10.57 CentOS-7-x86_64-GenericCloud.qcow2
|
||||
lrwxrwxrwx. 1 root root 36 14 feb 09.20 rhel-7.qcow2 -> rhel-guest-image-7.3-33.x86_64.qcow2
|
||||
-rw-r--r--. 1 root root 582695936 19 ott 18.44 rhel-guest-image-7.3-33.x86_64.qcow2
|
||||
```
|
||||
|
||||
Helps on updating the images, since one can leave config files pointing to
|
||||
*centos-7* and, in case of updates, make the symlink point a newer image.
|
||||
|
||||
Quickstart command
|
||||
------------------
|
||||
|
||||
A typical invocation of the TripleO Quickstart command is something similar to
|
||||
this:
|
||||
|
||||
```console
|
||||
/path/to/tripleo-quickstart/quickstart.sh \
|
||||
--bootstrap \
|
||||
--ansible-debug \
|
||||
--no-clone \
|
||||
--playbook baremetal-undercloud.yml \
|
||||
--working-dir /path/to/workdir \
|
||||
--config /path/to/config.yml \
|
||||
--release $RELEASE \
|
||||
--tags "all" \
|
||||
$VIRTHOST
|
||||
```
|
||||
|
||||
So nothing different from a normal quickstart deploy command line, the
|
||||
difference here is made by the config.yml as described above, with its provision
|
||||
script.
|
||||
|
||||
Conclusions
|
||||
-----------
|
||||
|
||||
This approach can be considered useful in testing multi environments with
|
||||
TripleO for three reasons:
|
||||
|
||||
* It is *fast*: it takes the same time to install the undercloud but less to
|
||||
provide it, since you don’t have to wait the physical undercloud provision;
|
||||
* It is *isolated*: using VLANs to separate the traffic keeps each environment
|
||||
completely isolated from the others;
|
||||
* It is *reliable*: you can have the undercloud on a shared storage and think
|
||||
about putting the undercloud vm in HA, live migrating it with libvirt,
|
||||
pacemaker, whatever...
|
||||
|
||||
There are no macroscopic cons, except for the initial configuration on the
|
||||
VIRTHOST, that is made only one time, at the beginning.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
GPL
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Raoul Scarazzini <rasca@redhat.com>
|
@ -1,46 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eux
|
||||
|
||||
VIRTHOST=$1
|
||||
DISTRO=$2
|
||||
VMNAME=$3
|
||||
VMETH0IP=$4
|
||||
VMETH0NM=$5
|
||||
VMETH0GW=$6
|
||||
VMSSHKEY=$7
|
||||
UCVLAN=$8
|
||||
UCEXTVLAN=$9
|
||||
|
||||
function wait_machine_status {
|
||||
UNDERCLOUD=$1
|
||||
STATUS=$2
|
||||
while true
|
||||
do
|
||||
nc $UNDERCLOUD 22 < /dev/null &> /dev/null
|
||||
NCSTATUS=$?
|
||||
if [ "$STATUS" == "up" ]
|
||||
then
|
||||
[ $NCSTATUS -eq 0 ] && break || (sleep 5; echo -n ".")
|
||||
else
|
||||
[ $NCSTATUS -ne 0 ] && break || (sleep 5; echo -n ".")
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# Copying public key on VIRTHOST
|
||||
echo -n "$(date) - Copying $VMSSHKEY on $VIRTHOST: "
|
||||
scp $VMSSHKEY root@$VIRTHOST:$VMNAME\_key.pub
|
||||
echo "Done."
|
||||
|
||||
# Providing the machine
|
||||
echo -n "$(date) - Starting provision of $VMNAME ($VMETH0IP) on $VIRTHOST: "
|
||||
ssh root@$VIRTHOST /root/multi-virtual-undercloud.sh $DISTRO $VMNAME $VMETH0IP $VMETH0NM $VMETH0GW $VMNAME\_key.pub $UCVLAN $UCEXTVLAN
|
||||
echo "Done."
|
||||
|
||||
set +e
|
||||
|
||||
# Wait for machine to come up
|
||||
echo -n "$(date) - Waiting for $VMNAME to come up again after update: "
|
||||
wait_machine_status $VMETH0IP "up"
|
||||
echo "Done."
|
@ -1,115 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eux
|
||||
|
||||
DISTRO=$1
|
||||
CLONEFROM=/images/$DISTRO\.qcow2
|
||||
VMNAME=$2
|
||||
VMIMG=/vms/$VMNAME\.qcow2
|
||||
VMIMGCOPY=/vms/ORIG-$VMNAME\.qcow2
|
||||
VMETH0IP=$3
|
||||
VMETH0NM=$4
|
||||
VMETH0GW=$5
|
||||
VMSSHKEY=$6
|
||||
VMDISKADD=50G
|
||||
UCVLAN=$7
|
||||
UCEXTVLAN=$8
|
||||
WORKDIR=/tmp/virt-undercloud-$(date +%s)
|
||||
|
||||
mkdir -p $WORKDIR
|
||||
pushd $WORKDIR
|
||||
|
||||
# Destroy the machine if it is running
|
||||
ISRUNNING=$(virsh list | grep $VMNAME || true)
|
||||
[ "x$ISRUNNING" != "x" ] && virsh destroy $VMNAME
|
||||
|
||||
# Undefine the vm if it is defined
|
||||
ISDEFINED=$(virsh list --all | grep $VMNAME || true)
|
||||
[ "x$ISDEFINED" != "x" ] && virsh undefine $VMNAME
|
||||
|
||||
# Copy qcow2 base image
|
||||
cp -v $CLONEFROM $VMIMG
|
||||
|
||||
echo "$(date) - Adding $VMDISKADD to $VMIMG: "
|
||||
qemu-img resize $VMIMG +$VMDISKADD
|
||||
|
||||
echo "$(date) - Resizing filesystem of $VMIMG: "
|
||||
cp -v $VMIMG $VMIMGCOPY
|
||||
virt-resize --expand /dev/sda1 $VMIMGCOPY $VMIMG
|
||||
rm -fv $VMIMGCOPY
|
||||
|
||||
echo "$(date) - Checking status of $VMIMG: "
|
||||
qemu-img info $VMIMG
|
||||
virt-filesystems --long -h --all -a $VMIMG
|
||||
|
||||
cat > ifcfg-eth0 <<EOF
|
||||
NAME=eth0
|
||||
DEVICE=eth0
|
||||
ONBOOT=yes
|
||||
BOOTPROTO=static
|
||||
IPADDR=$VMETH0IP
|
||||
NETMASK=$VMETH0NM
|
||||
GATEWAY=$VMETH0GW
|
||||
PEERDNS=yes
|
||||
DNS1=8.8.8.8
|
||||
TYPE=Ethernet
|
||||
EOF
|
||||
|
||||
cat > ifcfg-eth1 <<EOF
|
||||
NAME=eth1
|
||||
DEVICE=eth1
|
||||
ONBOOT=yes
|
||||
BOOTPROTO=none
|
||||
TYPE=Ethernet
|
||||
EOF
|
||||
|
||||
cat $VMSSHKEY >> ./authorized_keys
|
||||
|
||||
case "$DISTRO" in
|
||||
"centos-7") virt-customize -a $VMIMG \
|
||||
--root-password password:redhat \
|
||||
--install openssh-server \
|
||||
--run-command "xfs_growfs /" \
|
||||
--run-command "echo 'GRUB_CMDLINE_LINUX=\"console=tty0 crashkernel=auto no_timer_check net.ifnames=0 console=ttyS0,115200n8\"' >> /etc/default/grub" \
|
||||
--run-command "grubby --update-kernel=ALL --args=net.ifnames=0" \
|
||||
--run-command "systemctl enable sshd" \
|
||||
--mkdir /root/.ssh \
|
||||
--copy-in ifcfg-eth0:/etc/sysconfig/network-scripts/ \
|
||||
--copy-in ifcfg-eth1:/etc/sysconfig/network-scripts/ \
|
||||
--copy-in ./authorized_keys:/root/.ssh/ \
|
||||
--selinux-relabel
|
||||
;;
|
||||
"rhel-7") virt-customize -a $VMIMG \
|
||||
--root-password password:redhat \
|
||||
--run-command "curl -o rhos-release-latest.noarch.rpm http://rhos-release.virt.bos.redhat.com/repos/rhos-release/rhos-release-latest.noarch.rpm" \
|
||||
--run-command "rpm -Uvh rhos-release-latest.noarch.rpm" \
|
||||
--run-command "rhos-release rhel-7.3" \
|
||||
--install openssh-server \
|
||||
--run-command "systemctl enable sshd" \
|
||||
--run-command "rpm -e rhos-release" \
|
||||
--run-command "sed -i -e '/\[rhelosp-rhel-7.3-server-opt\]/,/^\[/s/enabled=0/enabled=1/' /etc/yum.repos.d/rhos-release-rhel-7.3.repo" \
|
||||
--mkdir /root/.ssh \
|
||||
--copy-in ifcfg-eth0:/etc/sysconfig/network-scripts/ \
|
||||
--copy-in ifcfg-eth1:/etc/sysconfig/network-scripts/ \
|
||||
--copy-in ./authorized_keys:/root/.ssh/ \
|
||||
--selinux-relabel
|
||||
;;
|
||||
esac
|
||||
|
||||
# Deploy the vm
|
||||
virt-install \
|
||||
--import \
|
||||
--name $VMNAME \
|
||||
--ram 16192 \
|
||||
--disk path=$VMIMG \
|
||||
--vcpus 8 \
|
||||
--os-type linux \
|
||||
--os-variant generic \
|
||||
--network bridge=br0 \
|
||||
--network bridge=br$UCVLAN \
|
||||
--network bridge=br$UCEXTVLAN \
|
||||
--graphics none \
|
||||
--noautoconsole
|
||||
|
||||
rm -rf $WORKDIR
|
||||
popd
|
Binary file not shown.
Before Width: | Height: | Size: 224 KiB |
@ -1,52 +0,0 @@
|
||||
Infrared Intance-ha Plugin Playbook
|
||||
====================================
|
||||
|
||||
This Plugin deploys Instance-Ha on OpenStack using InfraRed
|
||||
|
||||
The Tasks in infrared_instance-ha_plugin_main.yml, along with the
|
||||
plugin.spec at tripleo-ha-utils/plugin.spec provide support
|
||||
for running this repo's roles and playbooks as an Infrared plugin.
|
||||
|
||||
[InfraRed](http://infrared.readthedocs.io/en/stable/) is a plugin based system
|
||||
that aims to provide an easy-to-use CLI for Ansible based projects and
|
||||
OpenStack deployment.
|
||||
|
||||
The plugin provides infrared plugin integration for
|
||||
two OpenStack High-Availability features:
|
||||
|
||||
[instance-ha](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/instance-ha)
|
||||
|
||||
[stonith-config](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/stonith-config)
|
||||
|
||||
Usage:
|
||||
=====
|
||||
|
||||
**Installation and deployment:**
|
||||
|
||||
[Setup InfraRed](http://infrared.readthedocs.io/en/stable/bootstrap.html)
|
||||
|
||||
ir plugin add https://github.com/openstack/tripleo-ha-utils
|
||||
|
||||
export ANSIBLE_ROLES_PATH='plugins/tripleo-ha-utils/roles'
|
||||
|
||||
ir instance-ha-deploy -v --release 12 --stonith_devices all
|
||||
|
||||
*notice: a fail & warning will be issued if the plugin's specific ANSIBLE_ROLES_PATH is not defined *
|
||||
|
||||
|
||||
**Plugin help:**
|
||||
|
||||
ir instance-ha-deploy -h
|
||||
|
||||
|
||||
**Plugin Uninstall:**
|
||||
|
||||
ir plugin remove instance-ha-deploy
|
||||
|
||||
|
||||
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Pini Komarov pkomarov@redhat.com
|
@ -1,128 +0,0 @@
|
||||
---
|
||||
- name: check ANSIBLE_ROLES_PATH variable
|
||||
hosts: localhost
|
||||
tasks:
|
||||
|
||||
- shell: |
|
||||
echo $ANSIBLE_ROLES_PATH
|
||||
name: check $ANSIBLE_ROLES_PATH variable
|
||||
register: ansible_roles_path_out
|
||||
|
||||
- name: check $ANSIBLE_ROLES_PATH is set
|
||||
fail:
|
||||
msg: Please export ANSIBLE_ROLES_PATH='plugins/tripleo-ha-utils/roles' ; Before running this playbook with infrared.
|
||||
when: '"tripleo-ha-utils" not in ansible_roles_path_out.stdout'
|
||||
|
||||
|
||||
#manual override because of https://github.com/ansible/ansible/issues/26336
|
||||
#- name: Configure Instance HA
|
||||
# hosts: undercloud
|
||||
# gather_facts: yes
|
||||
#
|
||||
# tasks:
|
||||
#
|
||||
# - include_role:
|
||||
# name: instance-ha
|
||||
|
||||
#This creates the clouds.yaml file from undercloud/overcloud credentials
|
||||
#for use in pythonsdk api for osp connection and querrying:
|
||||
|
||||
- name: create clouds.yaml for pythonsdk api
|
||||
hosts: undercloud
|
||||
tasks:
|
||||
|
||||
- name: get undercloud variables
|
||||
shell: |
|
||||
for key in $( set | awk '{FS="="} /^OS_/ {print $1}' ); do unset $key ; done
|
||||
source /home/stack/stackrc
|
||||
echo -n "undercloud: {'auth': { 'auth_url': '$OS_AUTH_URL', 'username': '$OS_USERNAME', 'password': '$OS_PASSWORD', 'project_name': '${OS_PROJECT_NAME:-$OS_TENANT_NAME}', 'project_domain_name': '$OS_PROJECT_DOMAIN_NAME', 'user_domain_name': '$OS_USER_DOMAIN_NAME'}}"
|
||||
|
||||
register: cloud_details
|
||||
|
||||
- name: create clouds.yaml if doesn't exist
|
||||
blockinfile:
|
||||
content: 'clouds:'
|
||||
dest: /home/stack/clouds.yaml
|
||||
marker: "#{mark} HEADER"
|
||||
create: yes
|
||||
|
||||
- name: insert undercloud parameters
|
||||
blockinfile:
|
||||
dest: /home/stack/clouds.yaml
|
||||
block: |5
|
||||
{{ cloud_details.stdout|from_yaml|to_nice_yaml(indent=4) }}
|
||||
insertbefore: "#END undercloud SECTION"
|
||||
marker: "#{mark} undercloud PARAMETERS"
|
||||
|
||||
- name: get overcloud variables
|
||||
shell: |
|
||||
for key in $( set | awk '{FS="="} /^OS_/ {print $1}' ); do unset $key ; done
|
||||
source /home/stack/overcloudrc
|
||||
echo -n "overcloud: {'auth': { 'auth_url': '$OS_AUTH_URL', 'username': '$OS_USERNAME', 'password': '$OS_PASSWORD', 'project_name': '${OS_PROJECT_NAME:-$OS_TENANT_NAME}', 'project_domain_name': '$OS_PROJECT_DOMAIN_NAME', 'user_domain_name': '$OS_USER_DOMAIN_NAME' }}"
|
||||
|
||||
register: cloud_details
|
||||
|
||||
- name: create clouds.yaml if doesn't exist
|
||||
blockinfile:
|
||||
content: 'clouds:'
|
||||
dest: /home/stack/clouds.yaml
|
||||
marker: "#{mark} HEADER"
|
||||
create: yes
|
||||
|
||||
- name: insert overcloud parameters
|
||||
blockinfile:
|
||||
dest: /home/stack/clouds.yaml
|
||||
block: |5
|
||||
{{ cloud_details.stdout|from_yaml|to_nice_yaml(indent=4) }}
|
||||
insertbefore: "#END overcloud SECTION"
|
||||
marker: "#{mark} overcloud PARAMETERS"
|
||||
|
||||
|
||||
#This executes all from the undercloud itself:
|
||||
|
||||
- name: Configure Instance HA
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
|
||||
- name: create ansible hosts file
|
||||
template:
|
||||
src: templates/ansible_hosts.yml.j2
|
||||
dest: /home/stack/hosts
|
||||
owner: stack
|
||||
group: stack
|
||||
mode: 0644
|
||||
|
||||
- name: create ssh.config.ansible file
|
||||
shell: |
|
||||
source /home/stack/stackrc
|
||||
echo -e "Host undercloud\n Hostname 127.0.0.1\n IdentityFile /home/stack/.ssh/id_rsa\n User stack\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n" > ssh.config.ansible
|
||||
openstack server list -c Name -c Networks | awk '/ctlplane/ {print $2, $4}' | sed s/ctlplane=//g | while read node; do node_name=$(echo $node | cut -f 1 -d " "); node_ip=$(echo $node | cut -f 2 -d " "); echo -e "Host $node_name\n Hostname $node_ip\n IdentityFile /home/stack/.ssh/id_rsa\n User heat-admin\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n"; done >> ssh.config.ansible
|
||||
|
||||
- name: get tripleo-ha-utils repo
|
||||
git:
|
||||
repo: 'https://github.com/openstack/tripleo-ha-utils.git'
|
||||
dest: /home/stack/tripleo-ha-utils
|
||||
|
||||
- name: create ansible env file
|
||||
shell: |
|
||||
cat >/home/stack/ansible_ha.env<<EOF
|
||||
export ANSIBLE_INVENTORY="/home/stack/hosts"
|
||||
export ANSIBLE_SSH_ARGS="-F /home/stack/ssh.config.ansible"
|
||||
export ANSIBLE_CONFIG="/home/stack/ansible.cfg"
|
||||
|
||||
- name: create ansible.cfg file
|
||||
shell: |
|
||||
printf "[defaults]\nroles_path = /home/stack/tripleo-ha-utils/roles" > /home/stack/ansible.cfg
|
||||
|
||||
- name: run instance-ha deploy script
|
||||
shell: |
|
||||
source /home/stack/ansible_ha.env
|
||||
ansible-playbook -v /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release={{release}} -e stonith_devices={{stonith_devices}} -e instance_ha_shared_storage={{instance_ha_shared_storage}} -e instance_ha_action={{instance_ha_action}}
|
||||
register: instance_ha_deploy_outcome
|
||||
|
||||
vars:
|
||||
instance_ha_action: '{{ install.instance_ha_action }}'
|
||||
release: '{{ install.release }}'
|
||||
stonith_devices: '{{ install.stonith_devices }}'
|
||||
instance_ha_shared_storage: '{{ install.instance_ha_shared_storage }}'
|
@ -1,27 +0,0 @@
|
||||
undercloud ansible_host=undercloud ansible_user=stack ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
|
||||
{% for overcloud_host in groups['overcloud_nodes'] %}
|
||||
{{overcloud_host}} ansible_host={{overcloud_host}} ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
{% endfor %}
|
||||
|
||||
{% for overcloud_host in groups['overcloud_nodes'] %}
|
||||
{{overcloud_host}} ansible_host={{overcloud_host}} ansible_user=heat-admin ansible_private_key_file=/home/stack/.ssh/id_rsa
|
||||
{% endfor %}
|
||||
|
||||
[compute]
|
||||
{% for overcloud_host in groups['compute'] %}
|
||||
{{overcloud_host}}
|
||||
{% endfor %}
|
||||
|
||||
[undercloud]
|
||||
undercloud
|
||||
|
||||
[overcloud]
|
||||
{% for overcloud_host in groups['overcloud_nodes'] %}
|
||||
{{overcloud_host}}
|
||||
{% endfor %}
|
||||
|
||||
[controller]
|
||||
{% for overcloud_host in groups['controller'] %}
|
||||
{{overcloud_host}}
|
||||
{% endfor %}
|
@ -1,148 +0,0 @@
|
||||
---
|
||||
- name: Baremetal undercloud install
|
||||
hosts: localhost
|
||||
roles:
|
||||
- baremetal-undercloud
|
||||
tags:
|
||||
- baremetal-undercloud
|
||||
|
||||
- name: Add the undercloud node to the generated inventory
|
||||
hosts: localhost
|
||||
gather_facts: yes
|
||||
roles:
|
||||
- tripleo-inventory
|
||||
tags:
|
||||
- undercloud-inventory
|
||||
|
||||
- name: Setup repositories
|
||||
hosts: undercloud
|
||||
gather_facts: yes
|
||||
roles:
|
||||
- repo-setup
|
||||
tags:
|
||||
- undercloud-repo-setup
|
||||
|
||||
- name: Install packages
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- baremetal-undercloud/packages
|
||||
tags:
|
||||
- undercloud-pkgs-install
|
||||
|
||||
- name: Deploy the undercloud
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- undercloud-deploy
|
||||
tags:
|
||||
- undercloud-deploy
|
||||
|
||||
- name: Prepare baremetal for the overcloud deployment
|
||||
hosts: undercloud
|
||||
roles:
|
||||
- baremetal-prep-overcloud
|
||||
tags:
|
||||
- baremetal-prep-overcloud
|
||||
|
||||
- name: Prepare configuration files for the overcloud deployment
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- overcloud-prep-config
|
||||
tags:
|
||||
- overcloud-prep-config
|
||||
|
||||
- name: Prepare overcloud containers
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- overcloud-prep-containers
|
||||
tags:
|
||||
- overcloud-prep-containers
|
||||
|
||||
- name: Fetch the overcloud images
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
become: true
|
||||
roles:
|
||||
- fetch-images
|
||||
tags:
|
||||
- overcloud-fetch-images
|
||||
|
||||
- name: Prepare the overcloud images for deployment
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- overcloud-prep-images
|
||||
tags:
|
||||
- overcloud-prep-images
|
||||
|
||||
- name: Prepare overcloud flavors
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- overcloud-prep-flavors
|
||||
tags:
|
||||
- overcloud-prep-flavors
|
||||
|
||||
- name: Prepare the undercloud networks for the overcloud deployment
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- overcloud-prep-network
|
||||
tags:
|
||||
- overcloud-prep-network
|
||||
|
||||
- name: Prepare SSL for the overcloud
|
||||
hosts: undercloud
|
||||
gather_facts: yes
|
||||
roles:
|
||||
- overcloud-ssl
|
||||
tags:
|
||||
- overcloud-ssl
|
||||
|
||||
- name: Deploy the overcloud
|
||||
hosts: undercloud
|
||||
gather_facts: yes
|
||||
roles:
|
||||
- overcloud-deploy
|
||||
tags:
|
||||
- overcloud-deploy
|
||||
|
||||
- name: Add the overcloud nodes to the generated inventory
|
||||
hosts: undercloud
|
||||
gather_facts: yes
|
||||
vars:
|
||||
inventory: all
|
||||
roles:
|
||||
- tripleo-inventory
|
||||
tags:
|
||||
- overcloud-inventory
|
||||
|
||||
- name: Check the result of the deployment
|
||||
hosts: localhost
|
||||
tasks:
|
||||
- name: ensure the deployment result has been read into memory
|
||||
include_vars: "{{ local_working_dir }}/overcloud_deployment_result.json"
|
||||
|
||||
# overcloud_deploy_result = ["failed", "passed"]
|
||||
- name: did the deployment pass or fail?
|
||||
debug: var=overcloud_deploy_result
|
||||
failed_when: overcloud_deploy_result == "failed"
|
||||
tags:
|
||||
- overcloud-deploy-check
|
||||
|
||||
- name: Gather undercloud and overcloud facts
|
||||
hosts: undercloud overcloud
|
||||
gather_facts: yes
|
||||
tags:
|
||||
- overcloud-validate-ha
|
||||
|
||||
- name: Validate the overcloud using HA tests
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- validate-ha
|
||||
tags:
|
||||
- overcloud-validate-ha
|
@ -1,10 +0,0 @@
|
||||
---
|
||||
- name: Gather undercloud and overcloud facts
|
||||
hosts: undercloud overcloud
|
||||
gather_facts: yes
|
||||
|
||||
- name: Configure Instance HA
|
||||
hosts: undercloud
|
||||
gather_facts: no
|
||||
roles:
|
||||
- instance-ha
|
@ -1,7 +0,0 @@
|
||||
---
|
||||
|
||||
- name: Configure STONITH for all the hosts on the overcloud
|
||||
hosts: undercloud
|
||||
gather_facts: yes
|
||||
roles:
|
||||
- stonith-config
|
@ -1,14 +0,0 @@
|
||||
---
|
||||
- name: Gather undercloud and overcloud facts
|
||||
hosts: undercloud overcloud
|
||||
gather_facts: yes
|
||||
tags:
|
||||
- overcloud-validate-ha
|
||||
|
||||
- name: Validate overcloud HA status
|
||||
hosts: undercloud
|
||||
gather_facts: yes
|
||||
tags:
|
||||
- overcloud-validate-ha
|
||||
roles:
|
||||
- validate-ha
|
37
plugin.spec
37
plugin.spec
@ -1,37 +0,0 @@
|
||||
---
|
||||
config:
|
||||
entry_point: ./infrared/infrared_instance-ha_plugin_main.yml
|
||||
plugin_type: install
|
||||
subparsers:
|
||||
instance-ha-deploy:
|
||||
description: Collection of instance-ha configuration tasks
|
||||
include_groups: ["Ansible options", "Inventory", "Common options", "Answers file"]
|
||||
groups:
|
||||
|
||||
- title: Instance HA
|
||||
options:
|
||||
instance_ha_action:
|
||||
type: Value
|
||||
default: install
|
||||
help: |
|
||||
Can be 'install' or 'uninstall'
|
||||
|
||||
release:
|
||||
type: Value
|
||||
help: |
|
||||
A rhos release - version_number.
|
||||
Example: "rhos-10".
|
||||
required: yes
|
||||
stonith_devices:
|
||||
type: Value
|
||||
default: controllers
|
||||
help: |
|
||||
Can be all, controllers or computes
|
||||
|
||||
instance_ha_shared_storage:
|
||||
type: Bool
|
||||
help: |
|
||||
Do we have a shared storage or not?
|
||||
default: False
|
||||
|
||||
|
@ -1,53 +0,0 @@
|
||||
Rally tests
|
||||
===========
|
||||
|
||||
This directory contains all the files available to use Rally for testing the
|
||||
behavior of the TripleO environment.
|
||||
For example you can test if instance HA is behaving correctly inside the
|
||||
overcloud environment in which it was configured.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
A working and accessible TripleO environment, as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
||||
so an *hosts* file containing the whole environment inventory and, if needed, a
|
||||
*ssh.config.ansible* with all the information to access nodes.
|
||||
|
||||
How to use Rally to test Instance HA
|
||||
------------------------------------
|
||||
|
||||
If you want to launch a Rally test session to check how Instance HA is behaving
|
||||
into the overcloud you can rely on a command like this one:
|
||||
|
||||
ansible-playbook -i hosts \
|
||||
-e public_physical_network="public" \
|
||||
-e floating_ip_cidr="192.168.99.0/24" \
|
||||
-e public_net_pool_start="192.168.99.211" \
|
||||
-e public_net_pool_end="192.168.99.216" \
|
||||
-e public_net_gateway="192.168.99.254" \
|
||||
tripleo-ha-utils/rally/instance-ha.yml
|
||||
|
||||
this command can be launched from the *undercloud* machine or from a jump host
|
||||
(which must have all the required file locally).
|
||||
The requested parameters refers to the network settings in which the instances
|
||||
will be spawned into.
|
||||
|
||||
This will execute the tests contained in the template yaml:
|
||||
|
||||
* *InstanceHA.recover_instance_fip_and_volume*: spawn an instance, stop the
|
||||
compute it's running on, check it migrates, check node recovers;
|
||||
* *InstanceHA.recover_stopped_instance_fip*: spawn an instance, put it in
|
||||
stopped status, stop the compute it's running on, check it migrates, check
|
||||
node recovers;
|
||||
* *InstanceHA.recover_instance_two_cycles*: do as in the first step, but two
|
||||
times;
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
GPL
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Raoul Scarazzini <rasca@redhat.com>
|
@ -1,99 +0,0 @@
|
||||
---
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
become: yes
|
||||
become_method: sudo
|
||||
tasks:
|
||||
- name: Install Rally dependencies
|
||||
shell: |
|
||||
# Python pip
|
||||
wget https://bootstrap.pypa.io/get-pip.py -O get-pip.py
|
||||
python get-pip.py
|
||||
# Depndencies
|
||||
yum install -y gmp-devel libffi-devel libxml2-devel libxslt-devel openssl-devel postgresql-devel gcc python-devel
|
||||
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Install Rally
|
||||
shell: |
|
||||
# Install Rally from upstream
|
||||
wget -q -O- https://raw.githubusercontent.com/openstack/rally/master/install_rally.sh | bash |& tee rally-install.log
|
||||
mkdir -p .rally/plugins
|
||||
- name: Check Rally installation
|
||||
shell: |
|
||||
source /home/stack/rally/bin/activate
|
||||
rally --version
|
||||
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Copy instance-ha Rally plugin to remote rally directory
|
||||
copy:
|
||||
src: plugins/instanceha.py
|
||||
dest: .rally/plugins
|
||||
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Install Rally environment and create deployment
|
||||
shell: |
|
||||
source /home/stack/overcloudrc
|
||||
source /home/stack/rally/bin/activate
|
||||
export OS_INSECURE=True
|
||||
rally deployment create --fromenv --name overcloud |& tee rally-instance-ha-deployment-create.log
|
||||
rally deployment use overcloud
|
||||
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Prepare overcloud env
|
||||
shell: |
|
||||
source /home/stack/overcloudrc
|
||||
|
||||
projectid=$(openstack project list | awk '/admin/ {print $2}')
|
||||
wget -O /tmp/cirros-0.3.4-x86_64-disk.img http://download.cirros-cloud.net/0.3.4/cirros-0.3.4-x86_64-disk.img
|
||||
glance --os-project-id=$projectid image-create --name cirros --container-format bare --disk-format raw --file /tmp/cirros-0.3.4-x86_64-disk.img --visibility public
|
||||
|
||||
nova flavor-create --ephemeral 0 --is-public True m1.tiny overcloud-instance-test-small-flavor 2048 20 1
|
||||
|
||||
neutron net-create {{ public_physical_network }}-network --router:external=True --provider:physical_network {{ public_physical_network }} --provider:network_type flat
|
||||
neutron subnet-create --name {{ public_physical_network }}-subnet --disable-dhcp --allocation-pool start={{ public_net_pool_start }},end={{ public_net_pool_end }} --gateway {{ public_net_gateway }} {{ public_physical_network }}-network {{ floating_ip_cidr }}
|
||||
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Copy Rally task file
|
||||
template:
|
||||
src: templates/instance-ha.yaml.j2
|
||||
dest: "/home/stack/instance-ha.yaml"
|
||||
mode: 0666
|
||||
|
||||
- name: Start Rally task
|
||||
shell: |
|
||||
source /home/stack/rally/bin/activate
|
||||
rally task start --task /home/stack/instance-ha.yaml --deployment overcloud |& tee rally-instance-ha-run.log
|
||||
|
||||
- name: Create Report JUnit
|
||||
shell: |
|
||||
source /home/stack/rally/bin/activate
|
||||
rally task report --junit --out /home/stack/nosetests.xml |& tee rally-instance-ha-report.log
|
||||
|
||||
- fetch:
|
||||
src: "/home/stack/nosetests.xml"
|
||||
dest: "{{ lookup('env', 'PWD') }}/nosetests.xml"
|
||||
flat: yes
|
||||
|
||||
- hosts: undercloud
|
||||
gather_facts: no
|
||||
tasks:
|
||||
- name: Remove overcloud env
|
||||
shell: |
|
||||
source /home/stack/overcloudrc
|
||||
|
||||
projectid=$(openstack project list | awk '/admin/ {print $2}')
|
||||
glance --os-project-id=$projectid image-delete $(glance --os-project-id=$projectid image-list | awk '/cirros/ {print $2}')
|
||||
|
||||
nova flavor-delete overcloud-instance-test-small-flavor
|
||||
|
||||
neutron net-delete {{ public_physical_network }}-network
|
@ -1,458 +0,0 @@
|
||||
from os import path
|
||||
import socket
|
||||
import time
|
||||
|
||||
|
||||
from rally.common import logging
|
||||
from rally.common import sshutils
|
||||
from rally import exceptions
|
||||
from rally_openstack import consts
|
||||
from rally_openstack import scenario
|
||||
from rally_openstack.scenarios.vm import utils as vm_utils
|
||||
from rally_openstack.scenarios.cinder import utils as cinder_utils
|
||||
from rally.task import atomic
|
||||
from rally.task import types
|
||||
from rally.task import validation
|
||||
from rally.task import utils as task_utils
|
||||
import six
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def failover(self, host, command, port=22, username="", password="",
|
||||
key_filename=None, pkey=None):
|
||||
"""Trigger failover at host
|
||||
:param host:
|
||||
:param command:
|
||||
:return:
|
||||
"""
|
||||
if key_filename:
|
||||
key_filename = path.expanduser(key_filename)
|
||||
LOG.info("Host: %s. Injecting Failover %s" % (host,
|
||||
command))
|
||||
try:
|
||||
code, out, err = _run_command(self, server_ip=host, port=port,
|
||||
username=username,
|
||||
password=password,
|
||||
key_filename=key_filename,
|
||||
pkey=pkey, command=command
|
||||
)
|
||||
if code and code > 0:
|
||||
raise exceptions.ScriptError(
|
||||
"Error running command %(command)s. "
|
||||
"Error %(code)s: %(error)s" % {
|
||||
"command": command, "code": code, "error": err})
|
||||
except exceptions.SSHTimeout:
|
||||
LOG.debug("SSH session of disruptor command timeouted, continue...")
|
||||
pass
|
||||
|
||||
|
||||
def _run_command(self, server_ip, port, username, password, command,
|
||||
pkey=None, key_filename=None):
|
||||
"""Run command via SSH on server.
|
||||
Create SSH connection for server, wait for server to become available
|
||||
(there is a delay between server being set to ACTIVE and sshd being
|
||||
available). Then call run_command_over_ssh to actually execute the
|
||||
command.
|
||||
Note: Shadows vm.utils.VMScenario._run_command to support key_filename.
|
||||
:param server_ip: server ip address
|
||||
:param port: ssh port for SSH connection
|
||||
:param username: str. ssh username for server
|
||||
:param password: Password for SSH authentication
|
||||
:param command: Dictionary specifying command to execute.
|
||||
See `rally info find VMTasks.boot_runcommand_delete' parameter
|
||||
`command' docstring for explanation.
|
||||
:param key_filename: private key filename for SSH authentication
|
||||
:param pkey: key for SSH authentication
|
||||
:returns: tuple (exit_status, stdout, stderr)
|
||||
"""
|
||||
if not key_filename:
|
||||
pkey = pkey or self.context["user"]["keypair"]["private"]
|
||||
ssh = sshutils.SSH(username, server_ip, port=port,
|
||||
pkey=pkey, password=password,
|
||||
key_filename=key_filename)
|
||||
self._wait_for_ssh(ssh)
|
||||
return _run_command_over_ssh(self, ssh, command)
|
||||
|
||||
|
||||
@atomic.action_timer("vm.run_command_over_ssh")
|
||||
def _run_command_over_ssh(self, ssh, command):
|
||||
"""Run command inside an instance.
|
||||
This is a separate function so that only script execution is timed.
|
||||
:param ssh: A SSHClient instance.
|
||||
:param command: Dictionary specifying command to execute.
|
||||
See `rally info find VMTasks.boot_runcommand_delete' parameter
|
||||
`command' docstring for explanation.
|
||||
:returns: tuple (exit_status, stdout, stderr)
|
||||
"""
|
||||
cmd, stdin = [], None
|
||||
|
||||
interpreter = command.get("interpreter") or []
|
||||
if interpreter:
|
||||
if isinstance(interpreter, six.string_types):
|
||||
interpreter = [interpreter]
|
||||
elif type(interpreter) != list:
|
||||
raise ValueError("command 'interpreter' value must be str "
|
||||
"or list type")
|
||||
cmd.extend(interpreter)
|
||||
|
||||
remote_path = command.get("remote_path") or []
|
||||
if remote_path:
|
||||
if isinstance(remote_path, six.string_types):
|
||||
remote_path = [remote_path]
|
||||
elif type(remote_path) != list:
|
||||
raise ValueError("command 'remote_path' value must be str "
|
||||
"or list type")
|
||||
cmd.extend(remote_path)
|
||||
if command.get("local_path"):
|
||||
ssh.put_file(os.path.expanduser(
|
||||
command["local_path"]), remote_path[-1],
|
||||
mode=self.USER_RWX_OTHERS_RX_ACCESS_MODE)
|
||||
|
||||
if command.get("script_file"):
|
||||
stdin = open(os.path.expanduser(command["script_file"]), "rb")
|
||||
|
||||
elif command.get("script_inline"):
|
||||
stdin = six.moves.StringIO(command["script_inline"])
|
||||
|
||||
cmd.extend(command.get("command_args") or [])
|
||||
|
||||
return ssh.execute(cmd, stdin=stdin, timeout=10)
|
||||
|
||||
|
||||
def one_killing_iteration(self, server, fip, computes, disruptor_cmd,
|
||||
stop_instance):
|
||||
"""Find the host where instance is hosted, disrupt the host and
|
||||
verify status of the instance after the failover"""
|
||||
|
||||
server_admin = self.admin_clients("nova").servers.get(server.id)
|
||||
host_name_pre = getattr(server_admin, "OS-EXT-SRV-ATTR:host")
|
||||
host_name_ext = host_name_pre.split('.')[0] + ".external"
|
||||
hypervisors = self.admin_clients("nova").hypervisors.list()
|
||||
hostnames = []
|
||||
for hypervisor in hypervisors:
|
||||
hostnames.append(getattr(hypervisor, "hypervisor_hostname"))
|
||||
if getattr(hypervisor, "hypervisor_hostname") == host_name_pre:
|
||||
hypervisor_id = getattr(hypervisor, "id")
|
||||
hypervisor = self.admin_clients("nova").hypervisors.get(hypervisor_id)
|
||||
hypervisor_ip = socket.gethostbyname(host_name_ext.strip())
|
||||
|
||||
if not disruptor_cmd:
|
||||
disruptor_cmd = {
|
||||
"script_inline": "sudo sh -c \"echo b > /proc/sysrq-trigger\"",
|
||||
"interpreter": "/bin/sh"
|
||||
}
|
||||
|
||||
# Trigger failover of compute node hosting the instance
|
||||
failover(self, host=hypervisor_ip,
|
||||
command=disruptor_cmd,
|
||||
port=computes.get("port", 22),
|
||||
username=computes.get("username"),
|
||||
password=computes.get("password"),
|
||||
key_filename=computes.get("key_filename"),
|
||||
pkey=computes.get("pkey")
|
||||
)
|
||||
# Wait for instance to be moved to different host
|
||||
hostnames.remove(host_name_pre)
|
||||
task_utils.wait_for(
|
||||
server_admin,
|
||||
status_attr="OS-EXT-SRV-ATTR:host",
|
||||
ready_statuses=hostnames,
|
||||
update_resource=task_utils.get_from_manager(),
|
||||
timeout=120,
|
||||
check_interval=5
|
||||
)
|
||||
|
||||
# Check the instance is SHUTOFF in the case of stopped instance or
|
||||
# that the instance is pingable
|
||||
if stop_instance:
|
||||
task_utils.wait_for(
|
||||
server,
|
||||
ready_statuses=["SHUTOFF"],
|
||||
update_resource=task_utils.get_from_manager(),
|
||||
timeout=60,
|
||||
check_interval=2
|
||||
)
|
||||
#server_admin = self.admin_clients("nova").servers.get(server.id)
|
||||
#host_name_post = getattr(server_admin, "OS-EXT-SRV-ATTR:host")
|
||||
#if host_name_post in host_name_pre:
|
||||
#raise exceptions.InvalidHostException()
|
||||
else:
|
||||
try:
|
||||
if self.wait_for_ping:
|
||||
self._wait_for_ping(fip["ip"])
|
||||
except exceptions.TimeoutException:
|
||||
console_logs = self._get_server_console_output(server,
|
||||
None)
|
||||
LOG.debug("VM console logs:\n%s", console_logs)
|
||||
raise
|
||||
|
||||
|
||||
def recover_instance_ha(self, image, flavor, computes,
|
||||
volume_args=None,
|
||||
floating_network=None,
|
||||
use_floating_ip=True,
|
||||
force_delete=False,
|
||||
stop_instance=False,
|
||||
disruptor_cmd=None,
|
||||
iterations=1,
|
||||
wait_for_ping=True,
|
||||
max_log_length=None,
|
||||
**kwargs):
|
||||
"""Boot a server, trigger failover of host and verify instance.
|
||||
|
||||
:param image: glance image name to use for the vm
|
||||
:param flavor: VM flavor name
|
||||
:param computes: dictionary with credentials to the compute nodes
|
||||
consisting of username, password, port, key_filename, disruptor
|
||||
command and pkey.
|
||||
Examples::
|
||||
computes: {
|
||||
username: heat-admin,
|
||||
key_filename: /path/to/ssh/id_rsa.pub
|
||||
port: 22
|
||||
}
|
||||
:param volume_args: volume args for booting server from volume
|
||||
:param floating_network: external network name, for floating ip
|
||||
:param use_floating_ip: bool, floating or fixed IP for SSH connection
|
||||
:param force_delete: whether to use force_delete for servers
|
||||
:param stop_instance: whether to stop instance before disruptor command
|
||||
:param disruptor_cmd: command to be send to hosting compute node
|
||||
:param iterations: number of compute node killing iteration
|
||||
:param wait_for_ping: whether to check connectivity on server creation
|
||||
:param **kwargs: extra arguments for booting the server
|
||||
:param max_log_length: The number of tail nova console-log lines user
|
||||
would like to retrieve
|
||||
:returns:
|
||||
"""
|
||||
|
||||
self.wait_for_ping = wait_for_ping
|
||||
|
||||
if volume_args:
|
||||
volume = self.cinder.create_volume(volume_args["size"], imageRef=None)
|
||||
kwargs["block_device_mapping"] = {"vdrally": "%s:::1" % volume.id}
|
||||
|
||||
server, fip = self._boot_server_with_fip(
|
||||
image, flavor, use_floating_ip=use_floating_ip,
|
||||
floating_network=floating_network,
|
||||
key_name=self.context["user"]["keypair"]["name"],
|
||||
**kwargs)
|
||||
|
||||
task_utils.wait_for(
|
||||
server,
|
||||
ready_statuses=["ACTIVE"],
|
||||
update_resource=task_utils.get_from_manager(),
|
||||
timeout=120,
|
||||
check_interval=2
|
||||
)
|
||||
|
||||
try:
|
||||
if self.wait_for_ping:
|
||||
self._wait_for_ping(fip["ip"])
|
||||
except exceptions.TimeoutException:
|
||||
console_logs = self._get_server_console_output(server,
|
||||
max_log_length)
|
||||
LOG.debug("VM console logs:\n%s", console_logs)
|
||||
raise
|
||||
|
||||
if stop_instance:
|
||||
self._stop_server(server)
|
||||
task_utils.wait_for(
|
||||
server,
|
||||
ready_statuses=["SHUTOFF"],
|
||||
update_resource=task_utils.get_from_manager(),
|
||||
timeout=120,
|
||||
check_interval=2
|
||||
)
|
||||
|
||||
# Wait a little before killing the compute
|
||||
# If we do not wait, backing image will get corrupted which was reported as bug
|
||||
time.sleep(30)
|
||||
|
||||
for iteration in range(1, iterations+1):
|
||||
one_killing_iteration(self, server, fip, computes,
|
||||
disruptor_cmd, stop_instance)
|
||||
# Give cluster some time to recover original compute node
|
||||
LOG.info("Wait for compute nodes to come online after previous disruption")
|
||||
time.sleep(360)
|
||||
|
||||
if stop_instance:
|
||||
# Start instance If It was stopped.
|
||||
self._start_server(server)
|
||||
|
||||
task_utils.wait_for(
|
||||
server,
|
||||
ready_statuses=["ACTIVE"],
|
||||
update_resource=task_utils.get_from_manager(),
|
||||
timeout=120,
|
||||
check_interval=2
|
||||
)
|
||||
self._delete_server_with_fip(server, fip, force_delete=force_delete)
|
||||
|
||||
|
||||
@types.convert(image={"type": "glance_image"},
|
||||
flavor={"type": "nova_flavor"})
|
||||
@validation.add("image_valid_on_flavor",
|
||||
flavor_param="flavor", image_param="image")
|
||||
@validation.add("valid_command", param_name="command", required=False)
|
||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
||||
nullable=True, integer_only=True)
|
||||
@validation.add("external_network_exists", param_name="floating_network")
|
||||
@validation.add("required_services",
|
||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
||||
@validation.add("required_platform", platform="openstack",
|
||||
users=True, admin=True)
|
||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
||||
name="InstanceHA.recover_instance_fip_and_volume",
|
||||
platform="openstack")
|
||||
class InstanceHARecoverFIPAndVolume(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InstanceHARecoverFIPAndVolume, self).__init__(*args, **kwargs)
|
||||
|
||||
def run(self, image, flavor, computes,
|
||||
volume_args=None,
|
||||
floating_network=None,
|
||||
use_floating_ip=True,
|
||||
force_delete=False,
|
||||
wait_for_ping=True,
|
||||
max_log_length=None,
|
||||
**kwargs):
|
||||
|
||||
recover_instance_ha(self, image, flavor, computes,
|
||||
volume_args=volume_args,
|
||||
floating_network=floating_network,
|
||||
use_floating_ip=use_floating_ip,
|
||||
force_delete=force_delete,
|
||||
wait_for_ping=wait_for_ping,
|
||||
max_log_length=max_log_length,
|
||||
**kwargs)
|
||||
|
||||
|
||||
@types.convert(image={"type": "glance_image"},
|
||||
flavor={"type": "nova_flavor"})
|
||||
@validation.add("image_valid_on_flavor",
|
||||
flavor_param="flavor", image_param="image")
|
||||
@validation.add("valid_command", param_name="command", required=False)
|
||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
||||
nullable=True, integer_only=True)
|
||||
@validation.add("external_network_exists", param_name="floating_network")
|
||||
@validation.add("required_services",
|
||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
||||
@validation.add("required_platform", platform="openstack",
|
||||
users=True, admin=True)
|
||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
||||
name="InstanceHA.recover_instance_two_cycles",
|
||||
platform="openstack")
|
||||
class InstanceHARecoverTwoCycle(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InstanceHARecoverTwoCycle, self).__init__(*args, **kwargs)
|
||||
|
||||
def run(self, image, flavor, computes,
|
||||
volume_args=None,
|
||||
floating_network=None,
|
||||
use_floating_ip=True,
|
||||
force_delete=False,
|
||||
wait_for_ping=True,
|
||||
max_log_length=None,
|
||||
**kwargs):
|
||||
|
||||
recover_instance_ha(self, image, flavor, computes,
|
||||
volume_args=volume_args,
|
||||
floating_network=floating_network,
|
||||
use_floating_ip=use_floating_ip,
|
||||
force_delete=force_delete,
|
||||
iterations=2,
|
||||
wait_for_ping=wait_for_ping,
|
||||
max_log_length=max_log_length,
|
||||
**kwargs)
|
||||
|
||||
|
||||
@types.convert(image={"type": "glance_image"},
|
||||
flavor={"type": "nova_flavor"})
|
||||
@validation.add("image_valid_on_flavor",
|
||||
flavor_param="flavor", image_param="image")
|
||||
@validation.add("valid_command", param_name="command", required=False)
|
||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
||||
nullable=True, integer_only=True)
|
||||
@validation.add("external_network_exists", param_name="floating_network")
|
||||
@validation.add("required_services",
|
||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
||||
@validation.add("required_platform", platform="openstack",
|
||||
users=True, admin=True)
|
||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
||||
name="InstanceHA.recover_stopped_instance_fip",
|
||||
platform="openstack")
|
||||
class InstanceHARecoverStopped(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InstanceHARecoverStopped, self).__init__(*args, **kwargs)
|
||||
|
||||
def run(self, image, flavor, computes,
|
||||
volume_args=None,
|
||||
floating_network=None,
|
||||
use_floating_ip=True,
|
||||
force_delete=False,
|
||||
wait_for_ping=True,
|
||||
max_log_length=None,
|
||||
**kwargs):
|
||||
|
||||
recover_instance_ha(self, image, flavor, computes,
|
||||
volume_args=volume_args,
|
||||
floating_network=floating_network,
|
||||
use_floating_ip=use_floating_ip,
|
||||
force_delete=force_delete,
|
||||
stop_instance=True,
|
||||
wait_for_ping=wait_for_ping,
|
||||
max_log_length=max_log_length,
|
||||
**kwargs)
|
||||
|
||||
|
||||
@types.convert(image={"type": "glance_image"},
|
||||
flavor={"type": "nova_flavor"})
|
||||
@validation.add("image_valid_on_flavor",
|
||||
flavor_param="flavor", image_param="image")
|
||||
@validation.add("valid_command", param_name="command", required=False)
|
||||
@validation.add("number", param_name="port", minval=1, maxval=65535,
|
||||
nullable=True, integer_only=True)
|
||||
@validation.add("external_network_exists", param_name="floating_network")
|
||||
@validation.add("required_services",
|
||||
services=[consts.Service.NOVA, consts.Service.CINDER])
|
||||
@validation.add("required_platform", platform="openstack",
|
||||
users=True, admin=True)
|
||||
@scenario.configure(context={"cleanup@openstack": ["nova", "cinder"],
|
||||
"keypair@openstack": {}, "allow_ssh@openstack": None},
|
||||
name="InstanceHA.recover_instance_nova_compute",
|
||||
platform="openstack")
|
||||
class InstanceHARecoverNovaCompute(vm_utils.VMScenario, cinder_utils.CinderBasic):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InstanceHARecoverNovaCompute, self).__init__(*args, **kwargs)
|
||||
|
||||
def run(self, image, flavor, computes,
|
||||
volume_args=None,
|
||||
floating_network=None,
|
||||
use_floating_ip=True,
|
||||
force_delete=False,
|
||||
wait_for_ping=True,
|
||||
max_log_length=None,
|
||||
**kwargs):
|
||||
|
||||
disruptor_cmd = {
|
||||
"script_inline": "sudo kill -9 $(ps -ef | grep ^nova* | awk \'{print$2}\'); echo {}",
|
||||
"interpreter": "/bin/sh"
|
||||
}
|
||||
recover_instance_ha(self, image, flavor, computes,
|
||||
volume_args=volume_args,
|
||||
floating_network=floating_network,
|
||||
use_floating_ip=use_floating_ip,
|
||||
force_delete=force_delete,
|
||||
disruptor_cmd=disruptor_cmd,
|
||||
wait_for_ping=wait_for_ping,
|
||||
max_log_length=max_log_length,
|
||||
**kwargs)
|
@ -1,81 +0,0 @@
|
||||
---
|
||||
InstanceHA.recover_instance_fip_and_volume:
|
||||
-
|
||||
args:
|
||||
flavor:
|
||||
name: "m1.tiny"
|
||||
image:
|
||||
name: cirros
|
||||
volume_args:
|
||||
size: 1
|
||||
floating_network: "{{ public_physical_network }}-network"
|
||||
force_delete: false
|
||||
wait_for_ping: false
|
||||
computes:
|
||||
username: "heat-admin"
|
||||
key_filename: "/home/stack/.ssh/id_rsa"
|
||||
port: 22
|
||||
runner:
|
||||
type: "constant"
|
||||
times: 1
|
||||
concurrency: 1
|
||||
context:
|
||||
users:
|
||||
tenants: 2
|
||||
users_per_tenant: 1
|
||||
network: {}
|
||||
sla:
|
||||
failure_rate:
|
||||
max: 0.0
|
||||
InstanceHA.recover_stopped_instance_fip:
|
||||
-
|
||||
args:
|
||||
flavor:
|
||||
name: "m1.tiny"
|
||||
image:
|
||||
name: cirros
|
||||
floating_network: "{{ public_physical_network }}-network"
|
||||
force_delete: false
|
||||
wait_for_ping: false
|
||||
computes:
|
||||
username: "heat-admin"
|
||||
key_filename: "/home/stack/.ssh/id_rsa"
|
||||
port: 22
|
||||
runner:
|
||||
type: "constant"
|
||||
times: 1
|
||||
concurrency: 1
|
||||
context:
|
||||
users:
|
||||
tenants: 2
|
||||
users_per_tenant: 1
|
||||
network: {}
|
||||
sla:
|
||||
failure_rate:
|
||||
max: 0.0
|
||||
InstanceHA.recover_instance_two_cycles:
|
||||
-
|
||||
args:
|
||||
flavor:
|
||||
name: "m1.tiny"
|
||||
image:
|
||||
name: cirros
|
||||
floating_network: "{{ public_physical_network }}-network"
|
||||
force_delete: false
|
||||
wait_for_ping: false
|
||||
computes:
|
||||
username: "heat-admin"
|
||||
key_filename: "/home/stack/.ssh/id_rsa"
|
||||
port: 22
|
||||
runner:
|
||||
type: "constant"
|
||||
times: 1
|
||||
concurrency: 1
|
||||
context:
|
||||
users:
|
||||
tenants: 2
|
||||
users_per_tenant: 1
|
||||
network: {}
|
||||
sla:
|
||||
failure_rate:
|
||||
max: 0.0
|
@ -1,226 +0,0 @@
|
||||
instance-ha
|
||||
===========
|
||||
|
||||
This role aims to automate all the steps needed to configure instance HA on a
|
||||
deployed TripleO overcloud environment.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
||||
|
||||
**NOTE**: Instance-HA depends on STONITH. This means that all the steps
|
||||
performed by this role make sense only if on the overcloud STONITH has been
|
||||
configured. There is a dedicated role that automates the STONITH
|
||||
configuration, named [stonith-config](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/stonith-config).
|
||||
|
||||
Instance HA
|
||||
-----------
|
||||
|
||||
Instance HA is a feature that gives a certain degree of high-availability to the
|
||||
instances spawned by an OpenStack deployment. Namely, if a compute node on which
|
||||
an instance is running breaks for whatever reason, this configuration will spawn
|
||||
the instances that were running on the broken node onto a functioning one.
|
||||
This role automates are all the necessary steps needed to configure Pacemaker
|
||||
cluster to support this functionality. A typical cluster configuration on a
|
||||
clean stock **newton** (or **osp10**) deployment is something like this:
|
||||
|
||||
Online: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
|
||||
Full list of resources:
|
||||
|
||||
ip-192.168.24.10 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
||||
ip-172.18.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
||||
ip-172.20.0.19 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
||||
ip-172.17.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
||||
ip-172.19.0.12 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
||||
Clone Set: haproxy-clone [haproxy]
|
||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
Master/Slave Set: galera-master [galera]
|
||||
Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
ip-172.17.0.18 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
||||
Clone Set: rabbitmq-clone [rabbitmq]
|
||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
Master/Slave Set: redis-master [redis]
|
||||
Masters: [ overcloud-controller-0 ]
|
||||
Slaves: [ overcloud-controller-1 overcloud-controller-2 ]
|
||||
openstack-cinder-volume (systemd:openstack-cinder-volume): Started overcloud-controller-0
|
||||
|
||||
As you can see we have 3 controllers, six IP resources, four *core* resources
|
||||
(*haproxy*, *galera*, *rabbitmq* and *redis*) and one last resource which is
|
||||
*openstack-cinder-volume* that needs to run as a single active/passive resource
|
||||
inside the cluster. This role configures all the additional resources needed
|
||||
to have a working instance HA setup. Once the playbook is executed, the
|
||||
configuration will be something like this:
|
||||
|
||||
Online: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
RemoteOnline: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
|
||||
Full list of resources:
|
||||
|
||||
ip-192.168.24.10 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
||||
ip-172.18.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
||||
ip-172.20.0.19 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
||||
ip-172.17.0.11 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
||||
ip-172.19.0.12 (ocf::heartbeat:IPaddr2): Started overcloud-controller-0
|
||||
Clone Set: haproxy-clone [haproxy]
|
||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
Master/Slave Set: galera-master [galera]
|
||||
Masters: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
ip-172.17.0.18 (ocf::heartbeat:IPaddr2): Started overcloud-controller-1
|
||||
Clone Set: rabbitmq-clone [rabbitmq]
|
||||
Started: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
Master/Slave Set: redis-master [redis]
|
||||
Masters: [ overcloud-controller-0 ]
|
||||
Slaves: [ overcloud-controller-1 overcloud-controller-2 ]
|
||||
Stopped: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
openstack-cinder-volume (systemd:openstack-cinder-volume): Started overcloud-controller-0
|
||||
ipmilan-overcloud-compute-0 (stonith:fence_ipmilan): Started overcloud-controller-1
|
||||
ipmilan-overcloud-controller-2 (stonith:fence_ipmilan): Started overcloud-controller-0
|
||||
ipmilan-overcloud-controller-0 (stonith:fence_ipmilan): Started overcloud-controller-0
|
||||
ipmilan-overcloud-controller-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
||||
ipmilan-overcloud-compute-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
||||
nova-evacuate (ocf::openstack:NovaEvacuate): Started overcloud-controller-0
|
||||
Clone Set: nova-compute-checkevacuate-clone [nova-compute-checkevacuate]
|
||||
Started: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
Stopped: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
Clone Set: nova-compute-clone [nova-compute]
|
||||
Started: [ overcloud-compute-0 overcloud-compute-1 ]
|
||||
Stopped: [ overcloud-controller-0 overcloud-controller-1 overcloud-controller-2 ]
|
||||
fence-nova (stonith:fence_compute): Started overcloud-controller-0
|
||||
overcloud-compute-1 (ocf::pacemaker:remote): Started overcloud-controller-0
|
||||
overcloud-compute-0 (ocf::pacemaker:remote): Started overcloud-controller-1
|
||||
|
||||
How Instance HA works
|
||||
---------------------
|
||||
|
||||
There are three key resource agents you need to consider. Here's the list:
|
||||
|
||||
- *fence_compute* (named **fence-nova** inside the cluster): which takes care
|
||||
of marking a compute node with the attribute "evacuate" set to yes;
|
||||
- *NovaEvacuate* (named **nova-evacuate** inside the cluster): which takes care
|
||||
of the effective evacuation of the instances and runs on one of the
|
||||
controllers;
|
||||
- *nova-compute-wait* (named **nova-compute-checkevacuate** inside the
|
||||
cluster): which waits for eventual evacuation before starting nova compute
|
||||
services and runs on each compute nodes;
|
||||
|
||||
Looking at the role you will notice that other systemd resources will be added
|
||||
into the cluster on the compute nodes, especially in older release like mitaka
|
||||
(*neutron-openvswitch-agent*, *libvirtd*, *openstack-ceilometer-compute* and
|
||||
*nova-compute*), but the keys for the correct instance HA comprehension are the
|
||||
aforementioned three resources.
|
||||
|
||||
Evacuation
|
||||
----------
|
||||
|
||||
The principle under which Instance HA works is *evacuation*. This means that
|
||||
when a host becomes unavailablea for whatever reason, instances on it are
|
||||
evacuated to another available host.
|
||||
Instance HA works both on shared storage and local storage environments, which
|
||||
means that evacuated instances will maintain the same network setup (static ip,
|
||||
floating ip and so on) and characteristics inside the new host, even if they
|
||||
will be spawned from scratch.
|
||||
|
||||
What happens when a compute node is lost
|
||||
----------------------------------------
|
||||
|
||||
Once configured, how does the system behaves when evacuation is needed? The
|
||||
following sequence describes the actions taken by the cluster and the OpenStack
|
||||
components:
|
||||
|
||||
1. A compute node (say overcloud-compute-1) which is running instances goes
|
||||
down for some reason (power outage, kernel panic, manual intervention);
|
||||
2. The cluster starts the action sequence to fence this host, since it needs
|
||||
to be sure that the host is *really* down before driving any other operation
|
||||
(otherwise there is potential for data corruption or multiple identical VMs
|
||||
running at the same time in the infrastructure). Setup is configured to have
|
||||
two levels of fencing for the compute hosts:
|
||||
|
||||
* **IPMI**: which will occur first and will take care of physically
|
||||
resetting the host and hence assuring that the machine is really powered
|
||||
off;
|
||||
* **fence-nova**: which will occur afterwards and will take care of marking
|
||||
with a cluster per-node attribute "evacuate=yes";
|
||||
|
||||
So the host gets reset and on the cluster a new node-property like the
|
||||
following will appear:
|
||||
|
||||
[root@overcloud-controller-0 ~]# attrd_updater -n evacuate -A
|
||||
name="evacuate" host="overcloud-compute-1.localdomain" value="yes"
|
||||
|
||||
3. At this point the resource **nova-evacuate** which constantly monitors the
|
||||
attributes of the cluster in search of the evacuate tag will find out that
|
||||
the *overcloud-compute-1* host needs evacuation, and by internally using
|
||||
*nova-compute commands*, will start the evactuation of the instances towards
|
||||
another host;
|
||||
4. In the meantime, while compute-1 is booting up again,
|
||||
**nova-compute-checkevacuate** will wait (with a default timeout of 120
|
||||
seconds) for the evacuation to complete before starting the chain via the
|
||||
*NovaCompute* resource that will enable the fenced host to become available
|
||||
again for running instances;
|
||||
|
||||
What to look for when something is not working
|
||||
----------------------------------------------
|
||||
|
||||
Here there are some tips to follow once you need to debug why instance HA is
|
||||
not working:
|
||||
|
||||
1. Check credentials: many resources require access data the the overcloud
|
||||
coming form the overcloudrc file, so it's not so difficult to do copy
|
||||
errors;
|
||||
2. Check connectivity: stonith is essential for cluster and if for some reason
|
||||
the cluster is not able to fence the compute nodes, the whole instance HA
|
||||
environment will not work;
|
||||
3. Check errors: inside the controller's cluster log
|
||||
(*/var/log/cluster/corosync.log*) some errors may catch the eye.
|
||||
|
||||
Examples on how to invoke the playbook via ansible
|
||||
--------------------------------------------------
|
||||
|
||||
This command line will install the whole instance-ha solution, with controller
|
||||
stonith, compute stonith and all the instance ha steps in:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10"
|
||||
|
||||
By default the playbook will install the instance-ha solution with the shared
|
||||
storage configuration, but it is possible to make the installation in a no
|
||||
shared storage environment, passing the **instance_ha_shared_storage** variable
|
||||
as **false**:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e instance_ha_shared_storage=false
|
||||
|
||||
If a user configured the overcloud with a specific domain it is possible to
|
||||
override the default "localdomain" value by passing the **overcloud_domain**
|
||||
variable to the playbook:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e overcloud_domain="mydomain"
|
||||
|
||||
If a user already installed STONITH for controllers and wants just to apply all
|
||||
the instance HA steps with STONITH for the compute nodes can launch this:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e stonith_devices="computes"
|
||||
|
||||
To uninstall the whole instance HA solution:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e instance_ha_action="uninstall"
|
||||
|
||||
Or if you a user needs to omit STONITH for the controllers:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-instance-ha.yml -e release="rhos-10" -e stonith_devices="computes" -e instance_ha_action="uninstall"
|
||||
|
||||
Is it also possible to totally omit STONITH configuration by passing "none" as
|
||||
the value of *stonith_devices*.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
GPL
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Raoul Scarazzini <rasca@redhat.com>
|
@ -1,13 +0,0 @@
|
||||
---
|
||||
|
||||
overcloud_working_dir: "/home/heat-admin"
|
||||
working_dir: "/home/stack"
|
||||
|
||||
# Can be install or uninstall
|
||||
instance_ha_action: "install"
|
||||
|
||||
# Do we have a shared storage or not?
|
||||
instance_ha_shared_storage: true
|
||||
|
||||
# Set overcloud domain
|
||||
overcloud_domain: "localdomain"
|
@ -1,386 +0,0 @@
|
||||
---
|
||||
- name: Apply STONITH for compute nodes
|
||||
include_role:
|
||||
name: stonith-config
|
||||
vars:
|
||||
stonith_devices: "computes"
|
||||
when:
|
||||
- stonith_devices in ["all","computes"]
|
||||
|
||||
- name: Disable openstack-nova-compute on compute
|
||||
service:
|
||||
name: openstack-nova-compute
|
||||
state: stopped
|
||||
enabled: no
|
||||
become: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Disable neutron-openvswitch-agent on compute
|
||||
service:
|
||||
name: neutron-openvswitch-agent
|
||||
state: stopped
|
||||
enabled: no
|
||||
become: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Disable openstack-ceilometer-compute on compute
|
||||
service:
|
||||
name: openstack-ceilometer-compute
|
||||
state: stopped
|
||||
enabled: no
|
||||
become: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Disable libvirtd on compute
|
||||
become: yes
|
||||
service:
|
||||
name: libvirtd
|
||||
state: stopped
|
||||
enabled: no
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Generate authkey for remote pacemaker
|
||||
shell: |
|
||||
dd if=/dev/urandom of="/tmp/authkey" bs=4096 count=1
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Make sure pacemaker config dir exists
|
||||
become: yes
|
||||
file:
|
||||
path: /etc/pacemaker
|
||||
state: directory
|
||||
mode: 0750
|
||||
group: "haclient"
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Copy authkey on all the overcloud nodes
|
||||
become: yes
|
||||
copy:
|
||||
src: /tmp/authkey
|
||||
dest: /etc/pacemaker/authkey
|
||||
mode: 0640
|
||||
group: "haclient"
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Remove authkey from local dir
|
||||
file:
|
||||
path: /tmp/authkey
|
||||
state: absent
|
||||
delegate_to: localhost
|
||||
|
||||
- name: Enable iptables traffic for pacemaker_remote
|
||||
become: yes
|
||||
shell: |
|
||||
iptables -I INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Make iptables pacemaker_remote rule permanent
|
||||
become: yes
|
||||
lineinfile:
|
||||
path: /etc/sysconfig/iptables
|
||||
line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
|
||||
insertafter: ":OUTPUT ACCEPT"
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Start pacemaker remote service on compute nodes
|
||||
become: yes
|
||||
service:
|
||||
name: pacemaker_remote
|
||||
enabled: yes
|
||||
state: started
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Get the name of the stack
|
||||
shell: |
|
||||
source {{ working_dir }}/stackrc
|
||||
openstack stack list -f value -c 'Stack Name'
|
||||
register: stack_name
|
||||
|
||||
- name: Check if a v3 overcloud's rc file exists
|
||||
stat:
|
||||
path: "{{ working_dir }}/{{ stack_name.stdout }}rc.v3"
|
||||
register: v3_rc_file_stat
|
||||
|
||||
- name: Get the contents of the overcloud's rc file v3
|
||||
set_fact:
|
||||
overcloudrc: "{{ stack_name.stdout }}rc.v3"
|
||||
when: v3_rc_file_stat.stat.exists
|
||||
|
||||
- name: Get the contents of the overcloud's rc file
|
||||
set_fact:
|
||||
overcloudrc: "{{ stack_name.stdout }}rc"
|
||||
when: not v3_rc_file_stat.stat.exists
|
||||
|
||||
- block:
|
||||
- name: Get OS_USERNAME from overcloudrc
|
||||
shell: |
|
||||
grep OS_USERNAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USERNAME=//g'
|
||||
register: "OS_USERNAME"
|
||||
|
||||
- name: Get OS_PASSWORD from overcloudrc
|
||||
shell: |
|
||||
grep OS_PASSWORD {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PASSWORD=//g'
|
||||
register: "OS_PASSWORD"
|
||||
|
||||
- name: Get OS_AUTH_URL from overcloudrc
|
||||
shell: |
|
||||
grep OS_AUTH_URL {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_AUTH_URL=//g'
|
||||
register: "OS_AUTH_URL"
|
||||
|
||||
- name: Get OS_PROJECT_NAME or OS_TENANT_NAME from overcloudrc
|
||||
shell: |
|
||||
grep -E 'OS_PROJECT_NAME|OS_TENANT_NAME' {{ working_dir }}/{{ overcloudrc }} | tail -1 | sed 's/export OS_.*_NAME=//g'
|
||||
register: "OS_TENANT_NAME"
|
||||
|
||||
- name: Get OS_USER_DOMAIN_NAME from overcloudrc
|
||||
shell: |
|
||||
grep OS_USER_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_USER_DOMAIN_NAME=//g'
|
||||
register: "OS_USER_DOMAIN_NAME"
|
||||
when: v3_rc_file_stat.stat.exists
|
||||
|
||||
- name: Get OS_PROJECT_DOMAIN_NAME from overcloudrc
|
||||
shell: |
|
||||
grep OS_PROJECT_DOMAIN_NAME {{ working_dir }}/{{ overcloudrc }} | sed 's/export OS_PROJECT_DOMAIN_NAME=//g'
|
||||
register: "OS_PROJECT_DOMAIN_NAME"
|
||||
when: v3_rc_file_stat.stat.exists
|
||||
|
||||
- name: Define variable for pcs additional options for overcloud's rc file v3
|
||||
set_fact:
|
||||
pcs_v3_rc_file_opts: ""
|
||||
|
||||
- name: Define variable for pcs additional options for no_shared_storage
|
||||
set_fact:
|
||||
pcs_NovaEvacuate_no_shared_storage_opts: ""
|
||||
pcs_fence_compute_no_shared_storage_opts: ""
|
||||
|
||||
- name: Set pcs additional options for overcloud's rc file v3
|
||||
set_fact:
|
||||
pcs_v3_rc_file_opts: "project_domain=$OS_PROJECT_DOMAIN_NAME user_domain=$OS_USER_DOMAIN_NAME"
|
||||
when: v3_rc_file_stat.stat.exists
|
||||
|
||||
- name: Set pcs additional options for no_shared_storage
|
||||
set_fact:
|
||||
pcs_NovaEvacuate_no_shared_storage_opts: "no_shared_storage=1"
|
||||
pcs_fence_compute_no_shared_storage_opts: "no-shared-storage=True"
|
||||
when: not instance_ha_shared_storage|bool
|
||||
|
||||
- block:
|
||||
- name: Create resource nova-evacuate
|
||||
shell: |
|
||||
pcs resource create nova-evacuate ocf:openstack:NovaEvacuate auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME {{ pcs_v3_rc_file_opts }} {{ pcs_NovaEvacuate_no_shared_storage_opts }} --force
|
||||
|
||||
- name: Create pacemaker constraint to start nova-evacuate only on non compute nodes
|
||||
shell: |
|
||||
pcs constraint location nova-evacuate rule resource-discovery=never score=-INFINITY osprole eq compute
|
||||
|
||||
- name: Create pacemaker constraints to start VIP resources before nova-evacuate
|
||||
shell: |
|
||||
for i in $(pcs status | grep IP | awk '{ print $1 }')
|
||||
do pcs constraint order start $i then nova-evacuate
|
||||
done
|
||||
|
||||
- name: Create pacemaker constraints to start openstack services before nova-evacuate
|
||||
shell: "pcs constraint order start {{ item }} then nova-evacuate require-all=false"
|
||||
with_items:
|
||||
- openstack-glance-api-clone
|
||||
- neutron-metadata-agent-clone
|
||||
- openstack-nova-conductor-clone
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Disable keystone resource
|
||||
shell: "pcs resource disable openstack-keystone --wait=900"
|
||||
when: release in [ 'liberty', 'rhos-8' ]
|
||||
|
||||
# Keystone resource was replaced by openstack-core resource in RHOS9
|
||||
- name: Disable openstack-core resource
|
||||
shell: "pcs resource disable openstack-core --wait=900"
|
||||
when: release in [ 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Set controller pacemaker property on controllers
|
||||
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=controller"
|
||||
with_items: "{{ groups['controller'] }}"
|
||||
|
||||
- name: Get stonith devices
|
||||
shell: "pcs stonith | awk '{print $1}' | tr '\n' ' '"
|
||||
register: stonithdevs
|
||||
|
||||
- name: Setup stonith devices
|
||||
shell: |
|
||||
for i in $(cibadmin -Q --xpath //primitive --node-path | awk -F "id='" '{print $2}' | awk -F "'" '{print $1}' | uniq); do
|
||||
found=0
|
||||
if [ -n "{{ stonithdevs.stdout }}" ]; then
|
||||
for x in {{ stonithdevs.stdout }}; do
|
||||
if [ "$x" == "$i" ]; then
|
||||
found=1
|
||||
fi
|
||||
done
|
||||
fi
|
||||
if [ $found = 0 ]; then
|
||||
pcs constraint location $i rule resource-discovery=exclusive score=0 osprole eq controller
|
||||
fi
|
||||
done
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create compute pacemaker resources and constraints
|
||||
shell: |
|
||||
pcs resource create nova-compute-checkevacuate ocf:openstack:nova-compute-wait auth_url=$OS_AUTH_URL username=$OS_USERNAME password=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} op start timeout=300 --clone interleave=true --disabled --force
|
||||
pcs constraint location nova-compute-checkevacuate-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
||||
pcs resource create nova-compute systemd:openstack-nova-compute op start timeout=60s --clone interleave=true --disabled --force
|
||||
pcs constraint location nova-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
||||
pcs constraint order start nova-compute-checkevacuate-clone then nova-compute-clone require-all=true
|
||||
pcs constraint order start nova-compute-clone then nova-evacuate require-all=false
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create compute pacemaker resources and constraints
|
||||
shell: |
|
||||
pcs resource create neutron-openvswitch-agent-compute systemd:neutron-openvswitch-agent --clone interleave=true --disabled --force
|
||||
pcs constraint location neutron-openvswitch-agent-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
||||
pcs resource create libvirtd-compute systemd:libvirtd --clone interleave=true --disabled --force
|
||||
pcs constraint location libvirtd-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
||||
pcs constraint order start neutron-openvswitch-agent-compute-clone then libvirtd-compute-clone
|
||||
pcs constraint colocation add libvirtd-compute-clone with neutron-openvswitch-agent-compute-clone
|
||||
pcs resource create ceilometer-compute systemd:openstack-ceilometer-compute --clone interleave=true --disabled --force
|
||||
pcs constraint location ceilometer-compute-clone rule resource-discovery=exclusive score=0 osprole eq compute
|
||||
pcs constraint order start libvirtd-compute-clone then ceilometer-compute-clone
|
||||
pcs constraint colocation add ceilometer-compute-clone with libvirtd-compute-clone
|
||||
pcs constraint order start libvirtd-compute-clone then nova-compute-clone
|
||||
pcs constraint colocation add nova-compute-clone with libvirtd-compute-clone
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Create pacemaker constraint for neutron-server, nova-conductor and ceilometer-notification
|
||||
shell: |
|
||||
pcs constraint order start neutron-server-clone then neutron-openvswitch-agent-compute-clone require-all=false
|
||||
pcs constraint order start openstack-ceilometer-notification-clone then ceilometer-compute-clone require-all=false
|
||||
pcs constraint order start openstack-nova-conductor-clone then nova-compute-checkevacuate-clone require-all=false
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Set requires to fencing as default for all resources (Pike/RHOS-12)
|
||||
shell: "pcs resource defaults requires=fencing"
|
||||
when: release in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create fence-nova pacemaker resource (no shared storage)
|
||||
shell: "pcs stonith create fence-nova fence_compute auth_url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} record_only=1 {{ pcs_fence_compute_no_shared_storage_opts }} --force"
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create fence-nova pacemaker resource (Pike/RHOS-12)
|
||||
shell: "pcs stonith create fence-nova fence_compute auth_url=$OS_AUTH_URL login=$OS_USERNAME passwd=$OS_PASSWORD tenant_name=$OS_TENANT_NAME domain={{ overcloud_domain }} record_only=1 {{ pcs_fence_compute_no_shared_storage_opts }} meta provides=unfencing --force"
|
||||
when: release in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create pacemaker constraint for fence-nova to fix it on controller node and set resource-discovery never
|
||||
shell: "pcs constraint location fence-nova rule resource-discovery=never score=0 osprole eq controller"
|
||||
|
||||
- name: Create pacemaker constraint for fence-nova to start after galera
|
||||
shell: "pcs constraint order promote galera-master then fence-nova require-all=false"
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create nova-compute order constraint on fence-nova
|
||||
shell: "pcs constraint order start fence-nova then nova-compute-clone"
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Set cluster recheck interval to 1 minute
|
||||
shell: "pcs property set cluster-recheck-interval=1min"
|
||||
|
||||
- name: Create pacemaker remote resource on compute nodes
|
||||
shell: "pcs resource create {{ hostvars[item]['ansible_hostname'] }} ocf:pacemaker:remote reconnect_interval=240 op monitor interval=20"
|
||||
with_items: "{{ groups['compute'] }}"
|
||||
|
||||
- name: Set osprole for compute nodes
|
||||
shell: "pcs property set --node {{ hostvars[item]['ansible_hostname'] }} osprole=compute"
|
||||
with_items: "{{ groups['compute'] }}"
|
||||
|
||||
- name: Add STONITH level definitions for compute nodes
|
||||
shell: |
|
||||
compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
|
||||
pcs stonith level add 1 {{ item }} $compute_stonith_name,fence-nova
|
||||
with_items: "{{ groups['compute'] }}"
|
||||
|
||||
- name: Enable keystone resource
|
||||
shell: "pcs resource enable openstack-keystone"
|
||||
when: release in [ 'liberty', 'rhos-8' ]
|
||||
|
||||
- name: Enable openstack-core resource
|
||||
shell: "pcs resource enable openstack-core"
|
||||
when: release in [ 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Wait for httpd service to be started
|
||||
shell: "systemctl show httpd --property=ActiveState"
|
||||
register: httpd_status_result
|
||||
until: httpd_status_result.stdout.find('inactive') == -1 and httpd_status_result.stdout.find('activating') == -1
|
||||
retries: 30
|
||||
delay: 10
|
||||
when: release not in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Enable compute nodes resources (nova)
|
||||
shell: "pcs resource enable {{ item }}"
|
||||
with_items:
|
||||
- nova-compute-checkevacuate
|
||||
- nova-compute
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Create compute unfence resource to override default resource requires (Pike/RHOS-12)
|
||||
shell: |
|
||||
pcs resource create compute-unfence-trigger ocf:pacemaker:Dummy op start requires="unfencing" --clone --disabled
|
||||
pcs constraint location compute-unfence-trigger-clone rule resource-discovery=never score=-INFINITY osprole ne compute
|
||||
pcs resource enable compute-unfence-trigger
|
||||
when: release in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Enable compute nodes resources (others)
|
||||
shell: "pcs resource enable {{ item }}"
|
||||
with_items:
|
||||
- neutron-openvswitch-agent-compute
|
||||
- libvirtd-compute
|
||||
- ceilometer-compute
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
environment:
|
||||
OS_USERNAME: "{{ OS_USERNAME.stdout }}"
|
||||
OS_PASSWORD: "{{ OS_PASSWORD.stdout }}"
|
||||
OS_AUTH_URL: "{{ OS_AUTH_URL.stdout }}"
|
||||
OS_TENANT_NAME: "{{ OS_TENANT_NAME.stdout }}"
|
||||
OS_USER_DOMAIN_NAME: "{{ OS_USER_DOMAIN_NAME.stdout }}"
|
||||
OS_PROJECT_DOMAIN_NAME: "{{ OS_PROJECT_DOMAIN_NAME.stdout }}"
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Cleanup (if any) failed resources
|
||||
shell: |
|
||||
for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
|
||||
do
|
||||
pcs resource cleanup $resource
|
||||
done
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Wait for (if any) failed resources to recover
|
||||
shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
||||
register: failed_resources
|
||||
until: failed_resources.stdout != []
|
||||
retries: 10
|
||||
delay: 10
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
@ -1,31 +0,0 @@
|
||||
---
|
||||
- name: Check if Instance HA steps were already applied
|
||||
include: pre-checks.yml
|
||||
when:
|
||||
- instance_ha_action == "install"
|
||||
|
||||
- name: Apply STONITH for controller nodes
|
||||
include_role:
|
||||
name: stonith-config
|
||||
when:
|
||||
- instance_ha_action == "install"
|
||||
- stonith_devices in ["all","controllers"]
|
||||
|
||||
- name: Apply Instance High Availability steps
|
||||
include: apply.yml
|
||||
when:
|
||||
- instance_ha_action == "install"
|
||||
|
||||
- name: Undo Instance High Availability steps
|
||||
include: undo.yml
|
||||
when:
|
||||
- instance_ha_action == "uninstall"
|
||||
|
||||
- name: Remove STONITH for controller nodes
|
||||
include_role:
|
||||
name: stonith-config
|
||||
vars:
|
||||
stonith_action: "uninstall"
|
||||
when:
|
||||
- instance_ha_action == "uninstall"
|
||||
- stonith_devices in ["all","controllers"]
|
@ -1,25 +0,0 @@
|
||||
---
|
||||
- block:
|
||||
- name: Check if STONITH resources already exist
|
||||
shell: |
|
||||
pcs stonith show | grep {{ item }}
|
||||
with_items:
|
||||
- fence-nova
|
||||
register: pre_existing_stonith
|
||||
failed_when: pre_existing_stonith.rc == 0
|
||||
|
||||
- name: Check if IHA resources already exist
|
||||
shell: |
|
||||
pcs resource show | grep "{{ item }}"
|
||||
with_items:
|
||||
- compute-unfence-trigger
|
||||
- nova-compute-checkevacuate
|
||||
- nova-compute
|
||||
- nova-evacuate
|
||||
- neutron-openvswitch-agent-compute
|
||||
- libvirtd-compute
|
||||
- ceilometer-compute
|
||||
register: pre_existing_resources
|
||||
failed_when: pre_existing_resources.rc == 0
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
@ -1,168 +0,0 @@
|
||||
---
|
||||
- block:
|
||||
- name: Remove STONITH level definitions for compute nodes
|
||||
shell: |
|
||||
compute_stonith_name=$(cibadmin --query --xpath "//primitive[@class='stonith']/instance_attributes/nvpair[@value='{{ item }}']" | sed 's/.*id="\(.*\)-instance_attributes-pcmk_host_list".*/\1/g')
|
||||
for stonith_level in $(cibadmin --query --xpath "//configuration/fencing-topology/fencing-level[@devices='$compute_stonith_name,fence-nova'][@index='1'][@target='{{ item }}']" --node-path)
|
||||
do
|
||||
pcs stonith level delete 1 {{ item }} $compute_stonith_name,fence-nova
|
||||
done
|
||||
with_items: "{{ groups['compute'] }}"
|
||||
|
||||
- name: Remove fence-nova STONITH device
|
||||
shell: |
|
||||
for stonithid in $(pcs stonith show | awk '/fence_compute/ {print $1}')
|
||||
do
|
||||
pcs stonith delete fence-nova
|
||||
done
|
||||
|
||||
- name: Remove resources associated to remote nodes
|
||||
shell: |
|
||||
for resourceid in $(pcs resource show | grep compute | grep 'Clone Set:' | awk '{print $3}')
|
||||
do
|
||||
pcs resource cleanup $resourceid
|
||||
pcs --force resource delete $resourceid
|
||||
done
|
||||
|
||||
- name: Remove NovaEvacuate resource
|
||||
shell: |
|
||||
for resourceid in $(pcs resource show | grep NovaEvacuate | awk '/NovaEvacuate/ {print $1}')
|
||||
do
|
||||
pcs resource cleanup $resourceid
|
||||
pcs --force resource delete $resourceid
|
||||
done
|
||||
|
||||
- name: Remove pacemaker remote resource
|
||||
shell: |
|
||||
for resourceid in $(pcs resource show | awk '/:remote/ {print $1}')
|
||||
do
|
||||
pcs resource cleanup $resourceid
|
||||
pcs --force resource delete $resourceid
|
||||
done
|
||||
|
||||
- name: Remove constraints related to role controller
|
||||
shell: |
|
||||
for constraintid in $(pcs config show | grep -B 3 "osprole eq controller" | awk '/Constraint/ {print $2}')
|
||||
do
|
||||
pcs constraint delete $constraintid
|
||||
done
|
||||
|
||||
- name: Unset controller pacemaker property on controllers
|
||||
shell: |
|
||||
for nodeid in $(pcs property | awk '/osprole/ { print $1 }' | cut -d: -f1)
|
||||
do
|
||||
pcs property unset --node $nodeid osprole
|
||||
done
|
||||
|
||||
- name: Unset cluster recheck interval to 1 minute
|
||||
shell: |
|
||||
for propertyid in $(pcs property | awk '/cluster-recheck-interval/ { print $1 }' | cut -d: -f1)
|
||||
do
|
||||
pcs property unset cluster-recheck-interval
|
||||
done
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Cleanup failed resources (if any)
|
||||
shell: |
|
||||
for resource in $(pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq)
|
||||
do
|
||||
pcs resource cleanup $resource
|
||||
done
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Wait for failed resources to recover (if any)
|
||||
shell: pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
||||
register: failed_resources
|
||||
until: failed_resources.stdout != []
|
||||
retries: 10
|
||||
delay: 10
|
||||
become: yes
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Enable openstack-nova-compute on compute
|
||||
service:
|
||||
name: openstack-nova-compute
|
||||
state: started
|
||||
enabled: yes
|
||||
become: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release not in [ 'pike', 'rhos-12' ]
|
||||
|
||||
- name: Enable neutron-openvswitch-agent on compute
|
||||
service:
|
||||
name: neutron-openvswitch-agent
|
||||
state: started
|
||||
enabled: yes
|
||||
become: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Enable openstack-ceilometer-compute on compute
|
||||
service:
|
||||
name: openstack-ceilometer-compute
|
||||
state: started
|
||||
enabled: yes
|
||||
become: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Enable libvirtd on compute
|
||||
become: yes
|
||||
service:
|
||||
name: libvirtd
|
||||
state: started
|
||||
enabled: yes
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
when: release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ]
|
||||
|
||||
- name: Stop pacemaker remote service on compute nodes
|
||||
become: yes
|
||||
service:
|
||||
name: pacemaker_remote
|
||||
enabled: no
|
||||
state: stopped
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Disable iptables traffic for pacemaker_remote
|
||||
become: yes
|
||||
shell: |
|
||||
while [ $(iptables-save | grep -c "\-A INPUT \-p tcp \-m state \-\-state NEW \-m tcp \-\-dport 3121 \-j ACCEPT") -ne 0 ]
|
||||
do
|
||||
iptables -D INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT
|
||||
done
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Remove iptables pacemaker_remote permanent rule
|
||||
become: yes
|
||||
lineinfile:
|
||||
path: /etc/sysconfig/iptables
|
||||
line: "-A INPUT -p tcp -m state --state NEW -m tcp --dport 3121 -j ACCEPT"
|
||||
state: absent
|
||||
delegate_to: "{{ item }}"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
- "{{ groups['compute'] }}"
|
||||
|
||||
- name: Undo STONITH for compute nodes
|
||||
include_role:
|
||||
name: stonith-config
|
||||
vars:
|
||||
stonith_action: "uninstall"
|
||||
stonith_devices: "computes"
|
||||
when:
|
||||
- stonith_devices in ["all","computes"]
|
@ -1,90 +0,0 @@
|
||||
stonith-config
|
||||
==============
|
||||
|
||||
This role acts on an already deployed tripleo environment, setting up STONITH
|
||||
(Shoot The Other Node In The Head) inside the Pacemaker configuration for all
|
||||
the hosts that are part of the overcloud.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
||||
|
||||
STONITH
|
||||
-------
|
||||
|
||||
STONITH is the way a Pacemaker clusters use to be certain that a node is powered
|
||||
off. STONITH is the only way to use a shared storage environment without
|
||||
worrying about concurrent writes on disks. Inside TripleO environments STONITH
|
||||
is a requisite also for activating features like Instance HA because, before
|
||||
moving any machine, the system need to be sure that the "move from" machine is
|
||||
off.
|
||||
STONITH configuration relies on the **instackenv.json** file, used by TripleO
|
||||
also to configure Ironic and all the provision stuff.
|
||||
Basically this role enables STONITH on the Pacemaker cluster and takes all the
|
||||
information from the mentioned file, creating a STONITH resource for each host
|
||||
on the overcloud.
|
||||
After running this playbook the cluster configuration will have this properties:
|
||||
|
||||
$ sudo pcs property
|
||||
Cluster Properties:
|
||||
cluster-infrastructure: corosync
|
||||
cluster-name: tripleo_cluster
|
||||
...
|
||||
...
|
||||
**stonith-enabled: true**
|
||||
|
||||
And something like this, depending on how many nodes are there in the overcloud:
|
||||
|
||||
sudo pcs stonith
|
||||
ipmilan-overcloud-compute-0 (stonith:fence_ipmilan): Started overcloud-controller-1
|
||||
ipmilan-overcloud-controller-2 (stonith:fence_ipmilan): Started overcloud-controller-0
|
||||
ipmilan-overcloud-controller-0 (stonith:fence_ipmilan): Started overcloud-controller-0
|
||||
ipmilan-overcloud-controller-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
||||
ipmilan-overcloud-compute-1 (stonith:fence_ipmilan): Started overcloud-controller-1
|
||||
|
||||
Having all this in place is a requirement for a reliable HA solution and for
|
||||
configuring special OpenStack features like [Instance HA](https://github.com/openstack/tripleo-ha-utils/tree/master/roles/instance-ha).
|
||||
|
||||
**Note**: by default this role configures STONITH for the controllers nodes,
|
||||
but it is possible to configure all the nodes or to limitate it just for
|
||||
computes, by setting the **stonith_devices** variable, which by default is set
|
||||
to "controllers", but can also be "*all*" or "*computes*".
|
||||
|
||||
Limitations
|
||||
-----------
|
||||
|
||||
The only kind of STONITH devices supported are **for the moment** IPMI.
|
||||
|
||||
Examples on how to invoke the playbook via ansible
|
||||
--------------------------------------------------
|
||||
|
||||
This command line will install the STONITH devices for the controller nodes:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml
|
||||
|
||||
If a user wants to install the STONITH devices for all the nodes:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_devices="all"
|
||||
|
||||
To uninstall the STONITH devices for the controllers:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_action="uninstall"
|
||||
|
||||
To uninstall the STONITH devices just for the computes:
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-stonith-config.yml -e stonith_action="uninstall" -e stonith_devices="computes"
|
||||
|
||||
The STONITH role supports also "none" as a valid value for *stonith_devices*
|
||||
which can become useful when configuring instance HA in an environment already
|
||||
configured with STONITH for both controllers and computes.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
GPL
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Raoul Scarazzini <rasca@redhat.com>
|
@ -1,13 +0,0 @@
|
||||
---
|
||||
|
||||
overcloud_working_dir: "/home/heat-admin"
|
||||
working_dir: "/home/stack"
|
||||
instack_env_file: "{{ working_dir }}/instackenv.json"
|
||||
|
||||
config_stonith_python_script: config-stonith-from-instackenv.py.j2
|
||||
|
||||
# Can be install, uninstall or none
|
||||
stonith_action: "install"
|
||||
|
||||
# Can be all, controllers or computes
|
||||
stonith_devices: controllers
|
@ -1,32 +0,0 @@
|
||||
---
|
||||
- name: Load the STONITH creation script on the undercloud
|
||||
template:
|
||||
src: "{{ config_stonith_python_script }}"
|
||||
dest: "{{ working_dir }}/config_stonith_from_instackenv.py"
|
||||
mode: 0755
|
||||
|
||||
- name: Generate STONITH script
|
||||
shell: |
|
||||
source {{ working_dir }}/stackrc
|
||||
{{ working_dir }}/config_stonith_from_instackenv.py {{ instack_env_file }} {{ stonith_action }} {{ stonith_devices }}
|
||||
register: stonith_script
|
||||
|
||||
- name: Delete the STONITH script on the overcloud (if exists)
|
||||
file:
|
||||
path: "{{ overcloud_working_dir }}/config-stonith.sh"
|
||||
state: absent
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Create the STONITH script on the overcloud
|
||||
lineinfile:
|
||||
destfile: "{{ overcloud_working_dir }}/config-stonith.sh"
|
||||
line: "{{ stonith_script.stdout }}"
|
||||
create: yes
|
||||
mode: 0755
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
|
||||
- name: Execute STONITH script
|
||||
become: true
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
shell: >
|
||||
{{ overcloud_working_dir }}/config-stonith.sh &> config_stonith.log
|
@ -1,94 +0,0 @@
|
||||
#!/bin/python
|
||||
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
from keystoneauth1.identity import v2
|
||||
from keystoneauth1 import session
|
||||
from pprint import pprint
|
||||
from novaclient import client
|
||||
|
||||
# JSon file as first parameter
|
||||
jdata = open(sys.argv[1])
|
||||
data = json.load(jdata)
|
||||
|
||||
# install, uninstall, none
|
||||
fence_config = sys.argv[2]
|
||||
# controllers, computes, all or none
|
||||
fence_devices = sys.argv[3]
|
||||
|
||||
# Define variables to connect to nova
|
||||
os_username = os.environ['OS_USERNAME']
|
||||
os_password = os.environ['OS_PASSWORD']
|
||||
os_auth_url = os.environ['OS_AUTH_URL']
|
||||
try:
|
||||
os_tenant_name = os.environ['OS_TENANT_NAME']
|
||||
except:
|
||||
os_project_name = os.environ['OS_PROJECT_NAME']
|
||||
os_project_domain_name=os.environ['OS_PROJECT_DOMAIN_NAME']
|
||||
os_user_domain_name=os.environ['OS_USER_DOMAIN_NAME']
|
||||
os_compute_api_version = os.environ['COMPUTE_API_VERSION']
|
||||
|
||||
# If fence_devices includes controllers then we act on the overall stonith-enabled property of the cluster
|
||||
if (fence_devices in ['controllers','all']):
|
||||
# If we're uninstalling then we disable stonith
|
||||
if (fence_config == 'uninstall'):
|
||||
print('pcs property set stonith-enabled=false')
|
||||
# If we're installing then we enable it
|
||||
elif (fence_config == 'install'):
|
||||
print('pcs property set stonith-enabled=true')
|
||||
|
||||
# Connect to nova
|
||||
try:
|
||||
# Liberty/OSP-8,Mitaka/OSP-9,Newton/OSP-10
|
||||
nt = client.Client(2,
|
||||
os_username,
|
||||
os_password,
|
||||
os_tenant_name,
|
||||
os_auth_url)
|
||||
nt.hypervisors.list()
|
||||
except:
|
||||
try:
|
||||
# Ocata/OSP-11
|
||||
nt = client.Client(2,
|
||||
username=os_username,
|
||||
password=os_password,
|
||||
project_name=os_tenant_name,
|
||||
auth_url=os_auth_url)
|
||||
nt.hypervisors.list()
|
||||
except:
|
||||
# Pike/OSP-12
|
||||
nt = client.Client(2,
|
||||
auth_url=os_auth_url,
|
||||
username=os_username,
|
||||
password=os_password,
|
||||
project_name=os_project_name,
|
||||
project_domain_name=os_project_domain_name,
|
||||
user_domain_name=os_user_domain_name)
|
||||
nt.hypervisors.list()
|
||||
|
||||
# Parse instances
|
||||
for instance in nt.servers.list():
|
||||
for node in data["nodes"]:
|
||||
if (node["mac"][0].lower() == instance.addresses['ctlplane'][0]['OS-EXT-IPS-MAC:mac_addr']
|
||||
and
|
||||
(
|
||||
('controller' in instance.name and fence_devices in ['controllers','all'])
|
||||
or
|
||||
('compute' in instance.name and fence_devices in ['computes','all'])
|
||||
)
|
||||
):
|
||||
if (fence_config == 'uninstall'):
|
||||
print('pcs stonith delete ipmilan-{} || /bin/true'.format(instance.name))
|
||||
elif (fence_config == 'install'):
|
||||
try:
|
||||
print('pcs stonith create ipmilan-{} fence_ipmilan pcmk_host_list="{}" ipaddr="{}" login="{}" passwd="{}" ipport={} lanplus="true" delay=20 op monitor interval=60s'
|
||||
.format(instance.name,instance.name,node["pm_addr"],node["pm_user"],node["pm_password"],node["pm_port"]))
|
||||
except:
|
||||
print('pcs stonith create ipmilan-{} fence_ipmilan pcmk_host_list="{}" ipaddr="{}" login="{}" passwd="{}" lanplus="true" delay=20 op monitor interval=60s'
|
||||
.format(instance.name,instance.name,node["pm_addr"],node["pm_user"],node["pm_password"]))
|
||||
print('pcs constraint location ipmilan-{} avoids {}'
|
||||
.format(instance.name,instance.name))
|
||||
|
||||
# Close nova connection
|
||||
jdata.close()
|
@ -1,60 +0,0 @@
|
||||
################
|
||||
# Python imports
|
||||
################
|
||||
import os
|
||||
import json
|
||||
import sys
|
||||
# The below will be enabled once OS_AUTH_URL=http://192.0.2.1:5000/v3
|
||||
#from keystoneauth1.identity import v3
|
||||
from keystoneauth1.identity import v2
|
||||
from keystoneauth1 import session
|
||||
from pprint import pprint
|
||||
from novaclient import client
|
||||
|
||||
##########################################################
|
||||
# Environment variables (need to source before launching):
|
||||
##########################################################
|
||||
export NOVA_VERSION=1.1
|
||||
export OS_PASSWORD=$(sudo hiera admin_password)
|
||||
# If v3:
|
||||
export OS_AUTH_URL=http://192.0.2.1:5000/v3
|
||||
# else
|
||||
export OS_AUTH_URL=http://192.0.2.1:5000/v2.0
|
||||
export OS_USERNAME=admin
|
||||
export OS_TENANT_NAME=admin
|
||||
export COMPUTE_API_VERSION=1.1
|
||||
export OS_NO_CACHE=True
|
||||
|
||||
##############
|
||||
# JSON format:
|
||||
##############
|
||||
{ "nodes": [
|
||||
{
|
||||
"mac": [
|
||||
"b8:ca:3a:66:e3:82"
|
||||
],
|
||||
"_comment":"host12-rack03.scale.openstack.engineering.redhat.com",
|
||||
"cpu": "",
|
||||
"memory": "",
|
||||
"disk": "",
|
||||
"arch": "x86_64",
|
||||
"pm_type":"pxe_ipmitool",
|
||||
"pm_user":"qe-scale",
|
||||
"pm_password":"d0ckingSt4tion",
|
||||
"pm_addr":"10.1.8.102"
|
||||
},
|
||||
...
|
||||
|
||||
########################################################################
|
||||
# To make the below working os_auth_url must be http://192.0.2.1:5000/v3
|
||||
########################################################################
|
||||
auth = v3.Password(auth_url=os_auth_url,
|
||||
username=os_username,
|
||||
password=os_password,
|
||||
{% if release in [ 'liberty', 'rhos-8', 'mitaka', 'rhos-9' ] %}
|
||||
tenant_name=os_tenant_name,
|
||||
{% else %}
|
||||
project_name=os_tenant_name,
|
||||
{% endif %}
|
||||
user_domain_id='default',
|
||||
project_domain_id='default')
|
@ -1,119 +0,0 @@
|
||||
validate-ha
|
||||
===========
|
||||
|
||||
This role acts on an already deployed tripleo environment, testing HA related
|
||||
functionalities of the installation.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
The TripleO environment must be prepared as described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
||||
|
||||
This role tests also instances spawning and to make this working the
|
||||
definition of the floating network must be passed.
|
||||
It can be contained in a config file, like this:
|
||||
|
||||
private_network_cidr: "192.168.1.0/24"
|
||||
public_physical_network: "floating"
|
||||
floating_ip_cidr: "10.0.0.0/24"
|
||||
public_net_pool_start: "10.0.0.191"
|
||||
public_net_pool_end: "10.0.0.198"
|
||||
public_net_gateway: "10.0.0.254"
|
||||
|
||||
Or passed directly to the ansible command line (see examples below).
|
||||
|
||||
HA tests
|
||||
--------
|
||||
|
||||
HA tests are meant to check the behavior of the environment in front of
|
||||
circumstances that involve service interruption, lost of a node and in general
|
||||
actions that stress the OpenStack installation with unexpected failures.
|
||||
Each test is associated to a global variable that, if true, makes the test
|
||||
happen.
|
||||
Tests are grouped and performed by default depending on the OpenStack release.
|
||||
This is the list of the supported variables, with test description and name of
|
||||
the release on which the test is performed:
|
||||
|
||||
- **test_ha_failed_actions**: Look for failed actions (**all**)
|
||||
- **test_ha_master_slave**: Stop master slave resources (galera and redis), all
|
||||
the resources should come down (**all**)
|
||||
- **test_ha_keystone_constraint_removal**: Stop keystone resource (by stopping
|
||||
httpd), check no other resource is stopped (**mitaka**)
|
||||
- Next generation cluster checks (**newton**, **ocata**, **master**):
|
||||
- **test_ha_ng_a**: Stop every systemd resource, stop Galera and Rabbitmq,
|
||||
Start every systemd resource
|
||||
- **test_ha_ng_b**: Stop Galera and Rabbitmq, stop every systemd resource,
|
||||
Start every systemd resource
|
||||
- **test_ha_ng_c**: Stop Galera and Rabbitmq, wait 20 minutes to see if
|
||||
something fails
|
||||
|
||||
It is also possible to omit (or add) tests not made for the specific release,
|
||||
using the above vars, by passing to the command line variables like this:
|
||||
|
||||
...
|
||||
-e test_ha_failed_actions=false \
|
||||
-e test_ha_ng_a=true \
|
||||
...
|
||||
|
||||
In this case we will not check for failed actions, a test that otherwise would
|
||||
have been done in mitaka, and we will force the execution of the "ng_a" test
|
||||
described earlier, which is originally executed just in newton versions or
|
||||
above.
|
||||
|
||||
All tests are performed using the tool [ha-test-suite](https://github.com/openstack/tripleo-ha-utils/tree/master/tools/ha-test-suite).
|
||||
|
||||
Applying latency
|
||||
----------------
|
||||
|
||||
It is possible to add an arbitrary amount of milliseconds of latency on each
|
||||
overcloud node to check whether the environment can pass the HA validation in
|
||||
any case.
|
||||
Adding the latency will be a matter of passing two variables:
|
||||
|
||||
* **latency_ms**: which will be the number of additional milliseconds to be
|
||||
added to the interface;
|
||||
* **latency_eth_interface**: the physical interface to which the user wants to
|
||||
apply the latency, this must be present in all the overcloud nodes;
|
||||
|
||||
So a typical command line in which a user wants to add 20ms of latency on the
|
||||
ethernet device eth0 will contain something like this:
|
||||
|
||||
...
|
||||
-e latency_ms=20 \
|
||||
-e latency_eth_interface=eth0 \
|
||||
...
|
||||
|
||||
The latency will be applied before the tests execution and remove right after.
|
||||
|
||||
Examples on how to invoke the playbook via ansible
|
||||
--------------------------------------------------
|
||||
|
||||
Here's a way to invoke the tests from an *undercloud* machine prepared as
|
||||
described [here](https://github.com/openstack/tripleo-ha-utils/tree/master/README.md).
|
||||
|
||||
ansible-playbook /home/stack/tripleo-ha-utils/playbooks/overcloud-validate-ha.yml \
|
||||
-e release=ocata \
|
||||
-e local_working_dir=/home/stack \
|
||||
-e private_net_cidr="192.168.1.0/24" \
|
||||
-e public_physical_network="floating" \
|
||||
-e floating_ip_cidr="10.0.0.0/24" \
|
||||
-e public_net_pool_start="10.0.0.191" \
|
||||
-e public_net_pool_end="10.0.0.198" \
|
||||
-e public_net_gateway="10.0.0.254"
|
||||
|
||||
Note that the variables above can be declared inside a config.yml file that can
|
||||
be passed to the ansible-playbook command like this:
|
||||
|
||||
ansible-playbook -vvvv /home/stack/tripleo-ha-utils/playbooks/overcloud-validate-ha.yml -e @/home/stack/config.yml
|
||||
|
||||
The result will be the same.
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
GPL
|
||||
|
||||
Author Information
|
||||
------------------
|
||||
|
||||
Raoul Scarazzini <rasca@redhat.com>
|
@ -1,25 +0,0 @@
|
||||
---
|
||||
|
||||
working_dir: "/home/stack"
|
||||
validate_ha_logs_dir: "{{ working_dir }}/validate_ha_logs"
|
||||
overcloud_working_dir: "/home/heat-admin"
|
||||
|
||||
validate_ha_heat_environment: "validate-ha-heat-environment.yaml.j2"
|
||||
validate_ha_heat_template: "validate-ha-heat-template.yaml.j2"
|
||||
validate_ha_heat_instance_image_format: "qcow2"
|
||||
validate_ha_heat_instance_image_location: "http://download.cirros-cloud.net/0.4.0/cirros-0.4.0-x86_64-disk.img"
|
||||
validate_ha_heat_instance_volume_gb: 1
|
||||
|
||||
private_net_name: "private-network"
|
||||
private_subnet_name: "private-subnet"
|
||||
public_net_name: "public-network"
|
||||
public_subnet_name: "public-subnet"
|
||||
private_net_cidr: "10.1.1.0/24"
|
||||
public_physical_network: "datacentre"
|
||||
public_network_type: "flat"
|
||||
floating_ip_cidr: "{{ undercloud_network_cidr|default('192.0.2.0/24') }}"
|
||||
floating_ip_start: "{{ floating_ip_cidr|nthhost(100) }}"
|
||||
floating_ip_end: "{{ floating_ip_cidr|nthhost(120) }}"
|
||||
external_network_gateway: "{{ floating_ip_cidr|nthhost(1) }}"
|
||||
|
||||
latency_ms: 0
|
@ -1,26 +0,0 @@
|
||||
---
|
||||
|
||||
# Execute ha-test-suite test
|
||||
- block:
|
||||
- name: Testing {{ ha_test_name }} with recovery {{ ha_recovery_name }}"
|
||||
delegate_to: "{{ groups.controller[0] }}"
|
||||
shell: >
|
||||
{{ overcloud_working_dir }}/ha-test-suite/ha-test-suite.sh \
|
||||
-t {{ overcloud_working_dir }}/ha-test-suite/test/{{ ha_test_name }} \
|
||||
-r {{ overcloud_working_dir }}/ha-test-suite/recovery/{{ ha_recovery_name }}
|
||||
register: ha_test_cmd
|
||||
|
||||
- include_tasks: heat-validation-create.yml
|
||||
- include_tasks: heat-validation-check.yml
|
||||
- include_tasks: heat-validation-delete.yml
|
||||
|
||||
vars:
|
||||
stack_name: "stack_{{ ha_test_name }}"
|
||||
|
||||
always:
|
||||
- name: Copy stdout for test {{ ha_test_name }} to undercloud
|
||||
copy: content="{{ ha_test_cmd.stdout }}" dest="{{ validate_ha_logs_dir }}/{{ ha_test_name }}_stdout.log"
|
||||
rescue:
|
||||
- name: Copy stderr for test {{ ha_test_name }} to undercloud
|
||||
copy: content="{{ ha_test_cmd.stderr }}" dest="{{ validate_ha_logs_dir }}/{{ ha_test_name }}_stderr.log"
|
||||
- fail: msg="{{ ha_test_cmd.stderr }}"
|
@ -1,7 +0,0 @@
|
||||
---
|
||||
|
||||
- name: Wait up to five minutes for the instance to be reachable
|
||||
wait_for:
|
||||
host: "{{ vars[ stack_name + '_instance_ip'].stdout }}"
|
||||
port: 22
|
||||
timeout: 300
|
@ -1,30 +0,0 @@
|
||||
---
|
||||
|
||||
- name: Load image in Glance to be used by Heat
|
||||
shell: |
|
||||
source {{ working_dir }}/overcloudrc
|
||||
openstack image create \
|
||||
--disk-format {{ validate_ha_heat_instance_image_format }} \
|
||||
--file {{ working_dir }}/{{ heat_image_name }} \
|
||||
--format value \
|
||||
--column "id" \
|
||||
validate_ha_image > \
|
||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_image-create.log 2>&1
|
||||
|
||||
- name: Execute environment validation via Heat
|
||||
shell: |
|
||||
source {{ working_dir }}/overcloudrc
|
||||
openstack stack create \
|
||||
--environment validate-ha-heat-environment.yaml \
|
||||
--template validate-ha-heat-template.yaml \
|
||||
--wait \
|
||||
{{ stack_name }} > \
|
||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-create.log 2>&1
|
||||
|
||||
- name: Get instance IP
|
||||
shell: |
|
||||
source {{ working_dir }}/overcloudrc
|
||||
openstack stack show -c outputs -f json {{ stack_name }} | \
|
||||
jq --raw-output '.outputs[] | select( .output_key == "server_public_ip") | .output_value' 2>&1 | \
|
||||
tee {{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-instance-ip.log
|
||||
register: "{{ stack_name }}_instance_ip"
|
@ -1,16 +0,0 @@
|
||||
---
|
||||
|
||||
- name: Clean the created stack
|
||||
shell: |
|
||||
source {{ working_dir }}/overcloudrc
|
||||
openstack stack delete \
|
||||
--yes \
|
||||
--wait \
|
||||
{{ stack_name }} > \
|
||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_heat-delete.log 2>&1
|
||||
|
||||
- name: Clean image in Glance
|
||||
shell: |
|
||||
source {{ working_dir }}/overcloudrc
|
||||
openstack image delete validate_ha_image > \
|
||||
{{ validate_ha_logs_dir }}/{{ ha_test_name }}_image-delete.log 2>&1
|
@ -1,147 +0,0 @@
|
||||
---
|
||||
|
||||
- name: Include test sequence depending on release
|
||||
include_vars:
|
||||
dir: "vars"
|
||||
files_matching: "test_list_{{ release }}.yml"
|
||||
|
||||
- name: Create directory on the undercloud to store test results
|
||||
file: path={{ validate_ha_logs_dir }} state=directory
|
||||
|
||||
- name: Copy ha-test-suite on controllers
|
||||
shell: >
|
||||
{% if (undercloud_user == 'zuul') and (zuul.projects is defined) -%}
|
||||
/usr/bin/rsync --delay-updates -F --compress --archive -e 'ssh -F {{ local_working_dir }}/ssh.config.ansible' /home/{{ undercloud_user }}/src/opendev.org/openstack/tripleo-ha-utils/tools/ha-test-suite {{ hostvars[item]['ansible_hostname'] }}:
|
||||
{%- else -%}
|
||||
/usr/bin/rsync --delay-updates -F --compress --archive -e 'ssh -F {{ local_working_dir }}/ssh.config.ansible' {{ local_working_dir }}/tripleo-ha-utils/tools/ha-test-suite {{ hostvars[item]['ansible_hostname'] }}:
|
||||
{%- endif -%}
|
||||
delegate_to: "localhost"
|
||||
with_items:
|
||||
- "{{ groups['controller'] }}"
|
||||
|
||||
- name: Apply latency (if defined)
|
||||
vars:
|
||||
latency_action: "add"
|
||||
include_tasks: manage-latency.yml
|
||||
when: latency_ms|int > 0
|
||||
|
||||
- name: Create the environment template on undercloud
|
||||
template:
|
||||
src: "{{ validate_ha_heat_environment }}"
|
||||
dest: "{{ working_dir }}/validate-ha-heat-environment.yaml"
|
||||
mode: 0600
|
||||
|
||||
- name: Create the test template on undercloud
|
||||
template:
|
||||
src: "{{ validate_ha_heat_template }}"
|
||||
dest: "{{ working_dir }}/validate-ha-heat-template.yaml"
|
||||
mode: 0600
|
||||
|
||||
- name: Download and uncompress (if necessary) image file for Heat
|
||||
shell: |
|
||||
image_url="{{ validate_ha_heat_instance_image_location }}"
|
||||
image_file=$(basename $image_url)
|
||||
|
||||
curl -s -o $image_file $image_url
|
||||
|
||||
case "$image_file" in
|
||||
*.tar)
|
||||
image_name=$(tar xvf $image_file)
|
||||
;;
|
||||
*.tar.gz|*.tgz)
|
||||
image_name=$(tar xzvf $image_file)
|
||||
;;
|
||||
*.tar.bz2|*.tbz2)
|
||||
image_name=$(tar xjvf $image_file)
|
||||
;;
|
||||
*.tar.xz|*.txz)
|
||||
image_name=$(tar xJf $image_file)
|
||||
;;
|
||||
*.bz2)
|
||||
bunzip2 --force --quiet $image_file
|
||||
image_name=${image_file%.*};
|
||||
;;
|
||||
*.gz)
|
||||
gunzip --force --quiet $image_file
|
||||
image_name=${image_file%.*};
|
||||
;;
|
||||
*.xz)
|
||||
xz --force --quiet --decompress $image_file
|
||||
image_name=${image_file%.*};
|
||||
;;
|
||||
*) image_name=$image_file
|
||||
;;
|
||||
esac
|
||||
|
||||
echo $image_name
|
||||
register: image_name
|
||||
|
||||
- set_fact:
|
||||
heat_image_name: "{{ image_name.stdout }}"
|
||||
|
||||
# Test: failed actions
|
||||
- name: HA test - Failed actions
|
||||
vars:
|
||||
ha_test_name: "test_ha_failed_actions"
|
||||
ha_recovery_name: ""
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_failed_actions|bool
|
||||
|
||||
# Test: Master/Slave
|
||||
- name: HA test - Master/Slave core resource stop and start
|
||||
vars:
|
||||
ha_test_name: "test_master-slave"
|
||||
ha_recovery_name: "recovery_master-slave"
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_master_slave|bool
|
||||
|
||||
# Test: Keystone stop
|
||||
- name: HA test - Keystone stop
|
||||
vars:
|
||||
ha_test_name: "test_keystone-stop"
|
||||
ha_recovery_name: "recovery_keystone-stop"
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_keystone_stop|bool
|
||||
|
||||
# Test: Keystone removal
|
||||
- name: HA test - Keystone constraint removal
|
||||
vars:
|
||||
ha_test_name: "test_keystone-constraint-removal"
|
||||
ha_recovery_name: "recovery_keystone-constraint-removal"
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_keystone_constraint_removal|bool
|
||||
|
||||
# Test: NG A
|
||||
- name: HA test - Pacemaker light test A
|
||||
vars:
|
||||
ha_test_name: "test_pacemaker-light-a"
|
||||
ha_recovery_name: "recovery_pacemaker-light"
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_ng_a|bool
|
||||
|
||||
# Test: NG B
|
||||
- name: HA test - Pacemaker light test B
|
||||
vars:
|
||||
ha_test_name: "test_pacemaker-light-b"
|
||||
ha_recovery_name: "recovery_pacemaker-light"
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_ng_b|bool
|
||||
|
||||
# Test: NG C
|
||||
- name: HA test - Pacemaker light test C
|
||||
vars:
|
||||
ha_test_name: "test_pacemaker-light-c"
|
||||
ha_recovery_name: "recovery_pacemaker-light"
|
||||
include_tasks: ha-test-suite.yml
|
||||
when: test_ha_ng_c|bool
|
||||
|
||||
- name: Remove image file
|
||||
file:
|
||||
path: "{{ working_dir }}/{{ heat_image_name }}"
|
||||
state: absent
|
||||
|
||||
- name: Remove latency (if defined)
|
||||
vars:
|
||||
latency_action: "del"
|
||||
include_tasks: manage-latency.yml
|
||||
when: latency_ms|int > 0
|
@ -1,12 +0,0 @@
|
||||
# Manage latency on all nodes
|
||||
- name: "Manage latency on all nodes"
|
||||
shell: |
|
||||
/usr/sbin/tc qdisc {{ latency_action }} dev {{ latency_eth_interface }} root netem delay {{ latency_ms }}ms
|
||||
delegate_to: "{{ item }}"
|
||||
become: true
|
||||
with_items:
|
||||
- "{{ groups['overcloud'] }}"
|
||||
when:
|
||||
- latency_action in [ "add", "del" ]
|
||||
- latency_eth_interface is defined
|
||||
- latency_ms|int > 0
|
@ -1,13 +0,0 @@
|
||||
# Heat template parameters
|
||||
parameters:
|
||||
private_net_name: "{{ private_net_name }}"
|
||||
private_subnet_name: "{{ private_subnet_name }}"
|
||||
private_net_cidr: "{{ private_net_cidr }}"
|
||||
public_net_name: "{{ public_net_name }}"
|
||||
public_subnet_name: "{{ public_subnet_name }}"
|
||||
public_physical_network: "{{ public_physical_network }}"
|
||||
public_network_type: "{{ public_network_type }}"
|
||||
public_net_cidr: "{{ floating_ip_cidr }}"
|
||||
public_net_gateway: "{{ public_net_gateway }}"
|
||||
public_net_pool_start: "{{ public_net_pool_start }}"
|
||||
public_net_pool_end: "{{ public_net_pool_end }}"
|
@ -1,192 +0,0 @@
|
||||
heat_template_version: 2016-10-14
|
||||
description: spawning a server
|
||||
|
||||
parameters:
|
||||
private_net_name:
|
||||
type: string
|
||||
default: "private"
|
||||
description: Name of private network into which servers get deployed
|
||||
private_subnet_name:
|
||||
type: string
|
||||
default: private_subnet
|
||||
description: Name of private subnet into which servers get deployed
|
||||
private_net_cidr:
|
||||
type: string
|
||||
description: Private network address (CIDR notation)
|
||||
public_physical_network:
|
||||
type: string
|
||||
default: "datacentre"
|
||||
description: Physical network name
|
||||
public_network_type:
|
||||
type: string
|
||||
default: "flat"
|
||||
description: Type of the physical network (flat or vlan)
|
||||
constraints:
|
||||
- allowed_values:
|
||||
- vlan
|
||||
- flat
|
||||
public_net_name:
|
||||
type: string
|
||||
default: public
|
||||
description: Name of public network into which servers get deployed
|
||||
public_subnet_name:
|
||||
type: string
|
||||
default: public_subnet
|
||||
description: Name of public subnet into which servers get deployed
|
||||
public_net_cidr:
|
||||
type: string
|
||||
description: Public network address (CIDR notation)
|
||||
public_net_gateway:
|
||||
type: string
|
||||
description: Public network gateway address
|
||||
public_net_pool_start:
|
||||
type: string
|
||||
description: Start of public network IP address allocation pool
|
||||
public_net_pool_end:
|
||||
type: string
|
||||
description: End of public network IP address allocation pool
|
||||
|
||||
resources:
|
||||
|
||||
###########
|
||||
# Network #
|
||||
###########
|
||||
|
||||
private_net:
|
||||
type: OS::Neutron::Net
|
||||
properties:
|
||||
name: { get_param: private_net_name }
|
||||
|
||||
private_subnet:
|
||||
type: OS::Neutron::Subnet
|
||||
properties:
|
||||
name: { get_param: private_subnet_name }
|
||||
network_id: { get_resource: private_net }
|
||||
cidr: { get_param: private_net_cidr }
|
||||
|
||||
public_net:
|
||||
type: OS::Neutron::ProviderNet
|
||||
properties:
|
||||
name: { get_param: public_net_name }
|
||||
router_external: true
|
||||
physical_network: { get_param: public_physical_network }
|
||||
network_type: { get_param: public_network_type }
|
||||
|
||||
public_subnet:
|
||||
type: OS::Neutron::Subnet
|
||||
properties:
|
||||
name: { get_param: public_subnet_name }
|
||||
network_id: { get_resource: public_net }
|
||||
cidr: { get_param: public_net_cidr }
|
||||
gateway_ip: { get_param: public_net_gateway }
|
||||
allocation_pools:
|
||||
- start: { get_param: public_net_pool_start }
|
||||
end: { get_param: public_net_pool_end }
|
||||
|
||||
router:
|
||||
type: OS::Neutron::Router
|
||||
properties:
|
||||
external_gateway_info:
|
||||
network: { get_resource: public_net }
|
||||
|
||||
router_interface:
|
||||
type: OS::Neutron::RouterInterface
|
||||
properties:
|
||||
router_id: { get_resource: router }
|
||||
subnet_id: { get_resource: private_subnet }
|
||||
|
||||
public_net_port:
|
||||
type: OS::Neutron::Port
|
||||
properties:
|
||||
network: { get_resource: private_net }
|
||||
fixed_ips:
|
||||
- subnet: { get_resource: private_subnet }
|
||||
security_groups: [{ get_resource: public_security_group }]
|
||||
|
||||
public_floating_ip:
|
||||
type: OS::Neutron::FloatingIP
|
||||
properties:
|
||||
floating_network: { get_resource: public_net }
|
||||
port_id: { get_resource: public_net_port }
|
||||
|
||||
public_security_group:
|
||||
type: OS::Neutron::SecurityGroup
|
||||
properties:
|
||||
description: Add security group rules for the multi-tier architecture
|
||||
name: pingandssh
|
||||
rules:
|
||||
- remote_ip_prefix: 0.0.0.0/0
|
||||
protocol: tcp
|
||||
port_range_min: 22
|
||||
port_range_max: 22
|
||||
- remote_ip_prefix: 0.0.0.0/0
|
||||
protocol: tcp
|
||||
port_range_min: 80
|
||||
port_range_max: 80
|
||||
- remote_ip_prefix: 0.0.0.0/0
|
||||
protocol: icmp
|
||||
|
||||
###########
|
||||
# Volume #
|
||||
###########
|
||||
|
||||
instance_volume:
|
||||
type: OS::Cinder::Volume
|
||||
properties:
|
||||
name: "instance_volume"
|
||||
size: {{ validate_ha_heat_instance_volume_gb }}
|
||||
image: "validate_ha_image"
|
||||
|
||||
###########
|
||||
# Keypair #
|
||||
###########
|
||||
|
||||
instance_keypair:
|
||||
type: OS::Nova::KeyPair
|
||||
properties:
|
||||
name: "instance_keypair"
|
||||
save_private_key: "true"
|
||||
|
||||
###########
|
||||
# Flavor #
|
||||
###########
|
||||
|
||||
instance_flavor:
|
||||
type: OS::Nova::Flavor
|
||||
properties:
|
||||
name: "instance_flavor"
|
||||
ephemeral: 0
|
||||
ram: 2048
|
||||
disk: 10
|
||||
vcpus: 2
|
||||
|
||||
###########
|
||||
# Server #
|
||||
###########
|
||||
|
||||
instance:
|
||||
type: OS::Nova::Server
|
||||
properties:
|
||||
name: "validate_ha_instance"
|
||||
flavor: { get_resource: instance_flavor }
|
||||
key_name: { get_resource: instance_keypair }
|
||||
networks:
|
||||
- port: { get_resource: public_net_port }
|
||||
block_device_mapping: [{ device_name: "vda", volume_id : { get_resource : instance_volume }, delete_on_termination : "true" }]
|
||||
|
||||
outputs:
|
||||
server_private_ip:
|
||||
description: IP address of first web server in private network
|
||||
value: { get_attr: [ instance, first_address ] }
|
||||
|
||||
server_public_ip:
|
||||
description: Floating IP address of the web server
|
||||
value: { get_attr: [ public_floating_ip, floating_ip_address ] }
|
||||
|
||||
public_key:
|
||||
description: The public key of the keypair.
|
||||
value: { get_attr: [instance_keypair, public_key] }
|
||||
|
||||
private_key:
|
||||
description: The private key of the keypair.
|
||||
value: { get_attr: [instance_keypair, private_key] }
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: true
|
||||
test_ha_keystone_constraint_removal: false
|
||||
test_ha_ng_a: false
|
||||
test_ha_ng_b: false
|
||||
test_ha_ng_c: false
|
@ -1 +0,0 @@
|
||||
test_list_rocky.yml
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: false
|
||||
test_ha_keystone_constraint_removal: true
|
||||
test_ha_ng_a: false
|
||||
test_ha_ng_b: false
|
||||
test_ha_ng_c: false
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: false
|
||||
test_ha_keystone_constraint_removal: false
|
||||
test_ha_ng_a: true
|
||||
test_ha_ng_b: true
|
||||
test_ha_ng_c: true
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: false
|
||||
test_ha_keystone_constraint_removal: false
|
||||
test_ha_ng_a: true
|
||||
test_ha_ng_b: true
|
||||
test_ha_ng_c: true
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: false
|
||||
test_ha_keystone_constraint_removal: false
|
||||
test_ha_ng_a: true
|
||||
test_ha_ng_b: true
|
||||
test_ha_ng_c: true
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: false
|
||||
test_ha_keystone_constraint_removal: false
|
||||
test_ha_ng_a: true
|
||||
test_ha_ng_b: true
|
||||
test_ha_ng_c: true
|
@ -1 +0,0 @@
|
||||
test_list_newton.yml
|
@ -1 +0,0 @@
|
||||
test_list_ocata.yml
|
@ -1 +0,0 @@
|
||||
test_list_pike.yml
|
@ -1 +0,0 @@
|
||||
test_list_queens.yml
|
@ -1 +0,0 @@
|
||||
test_list_liberty.yml
|
@ -1 +0,0 @@
|
||||
test_list_mitaka.yml
|
@ -1,7 +0,0 @@
|
||||
test_ha_failed_actions: true
|
||||
test_ha_master_slave: true
|
||||
test_ha_keystone_stop: false
|
||||
test_ha_keystone_constraint_removal: false
|
||||
test_ha_ng_a: true
|
||||
test_ha_ng_b: true
|
||||
test_ha_ng_c: true
|
38
setup.cfg
38
setup.cfg
@ -1,38 +0,0 @@
|
||||
[metadata]
|
||||
name = tripleo-ha-utils
|
||||
summary = Give a set of tools to test TripleO HA capabilities
|
||||
description_file =
|
||||
long_description_content_type = text/markdown
|
||||
README.md
|
||||
author = Raoul Scarazzini
|
||||
author_email = rasca@redhat.com
|
||||
home_page = https://github.com/openstack/tripleo-ha-utils/
|
||||
classifier =
|
||||
License :: OSI Approved :: Apache Software License
|
||||
Development Status :: 4 - Beta
|
||||
Intended Audience :: Developers
|
||||
Intended Audience :: System Administrators
|
||||
Intended Audience :: Information Technology
|
||||
Topic :: Utilities
|
||||
|
||||
[build_sphinx]
|
||||
all_files = 1
|
||||
build-dir = doc/build
|
||||
source-dir = doc/source
|
||||
|
||||
[global]
|
||||
setup-hooks =
|
||||
pbr.hooks.setup_hook
|
||||
|
||||
[files]
|
||||
data_files =
|
||||
config = config/*
|
||||
playbooks = playbooks/*
|
||||
usr/local/share/ansible/roles = roles/*
|
||||
|
||||
[wheel]
|
||||
universal = 1
|
||||
|
||||
[pbr]
|
||||
skip_authors = True
|
||||
skip_changelog = True
|
20
setup.py
20
setup.py
@ -1,20 +0,0 @@
|
||||
# Copyright Red Hat, Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import setuptools
|
||||
|
||||
setuptools.setup(
|
||||
setup_requires=['pbr'],
|
||||
py_modules=[],
|
||||
pbr=True)
|
@ -1,145 +0,0 @@
|
||||
# OpenStack TripleO HA Test Suite
|
||||
|
||||
This project is a modular and a customizable test suite to be applied in an
|
||||
Overcloud OpenStack environment deployed via TripleO upstream or Red Hat
|
||||
OpenStack Director (OSPd).
|
||||
|
||||
## Usage
|
||||
|
||||
The script needs at least a test file (-t) which must contain the sequence of
|
||||
the operations to be done. A recovery file (-r), with the sequence of the
|
||||
operations needed to recovery the environment can also be passed. So a typical
|
||||
invocation will be something like this:
|
||||
|
||||
```console
|
||||
[heat-admin@overcloud-controller-0 overcloud-ha-test-suite]$ ./overcloud-ha-test-suite.sh -t test/test_keystone-constraint-removal -r recovery/recovery_keystone-constraint-removal
|
||||
Fri May 20 15:27:19 UTC 2016 - Populationg overcloud elements...OK
|
||||
Fri May 20 15:27:22 UTC 2016 - Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
|
||||
Fri May 20 15:27:22 UTC 2016 * Step 1: disable keystone resource via httpd stop
|
||||
Fri May 20 15:27:22 UTC 2016 - Performing action disable on resource httpd ..OK
|
||||
Fri May 20 15:27:26 UTC 2016 - List of cluster's failed actions:
|
||||
Cluster is OK.
|
||||
Fri May 20 15:27:29 UTC 2016 * Step 2: check resource status
|
||||
Fri May 20 15:27:29 UTC 2016 - Cycling for 10 minutes polling every minute the status of the resources
|
||||
Fri May 20 15:28:29 UTC 2016 - Polling...
|
||||
delay -> OK
|
||||
galera -> OK
|
||||
...
|
||||
...
|
||||
openstack-sahara-engine -> OK
|
||||
rabbitmq -> OK
|
||||
redis -> OK
|
||||
Fri May 20 15:41:00 UTC 2016 - List of cluster's failed actions:
|
||||
Cluster is OK.
|
||||
Fri May 20 15:41:03 UTC 2016 - Waiting 10 seconds to recover environment
|
||||
Fri May 20 15:41:13 UTC 2016 - Recovery: Enable keystone via httpd and check for failed actions
|
||||
Fri May 20 15:41:13 UTC 2016 * Step 1: enable keystone resource via httpd
|
||||
Fri May 20 15:41:13 UTC 2016 - Performing action enable on resource httpd-clone OK
|
||||
Fri May 20 15:41:15 UTC 2016 - List of cluster's failed actions:
|
||||
Cluster is OK.
|
||||
Fri May 20 15:41:17 UTC 2016 - End
|
||||
```
|
||||
|
||||
The exit status will depend on the result of the operations. If a disable
|
||||
operation fails, if failed actions will appear, if recovery does not ends with
|
||||
success exit status will not be 0.
|
||||
|
||||
## Test and recoveries
|
||||
|
||||
Test and recovery are bash script portions that are
|
||||
included inside the main script. Some functions and variables are available to
|
||||
help on recurring operations. These functions are listed here:
|
||||
|
||||
- **check_failed_actions**: will print failed actions and return error in case
|
||||
some of them are present;
|
||||
- **check_resources_process_status**: will check for the process status of the
|
||||
resources on the system (not in the cluster), i.e. will check if there is a
|
||||
process for mysql daemon;
|
||||
- **wait_resource_status**: will wail until a default timeout
|
||||
($RESOURCE_CHANGE_STATUS_TIMEOUT) for a resource to reach a status;
|
||||
- **check_resource_status**: will check a resource status, i.e. if you want to
|
||||
check if httpd resource is started;
|
||||
- **wait_cluster_start**: will wait the until a timeout
|
||||
($RESOURCE_CHANGE_STATUS_TIMEOUT) to be started, specifically will wait for
|
||||
all resources to be in state "Started";
|
||||
- **play_on_resources**: will set the status of a resource;
|
||||
|
||||
The variables are:
|
||||
|
||||
- **OVERCLOUD_CORE_RESOURCES**: which are galera and rabbitmq
|
||||
- **OVERCLOUD_RESOURCES**: which are *all* the resources
|
||||
- **OVERCLOUD_SYSTEMD_RESOURCES**: which are the resources managed via systemd
|
||||
by pacemaker;
|
||||
|
||||
And can be used in combination to wrote test and recovery files.
|
||||
|
||||
### Test file contents
|
||||
|
||||
A typical test file, say test/test_keystone-constraint-removal, will contain
|
||||
something like this:
|
||||
|
||||
```bash
|
||||
# Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
|
||||
|
||||
echo "$(date) * Step 1: disable keystone resource via httpd stop"
|
||||
play_on_resources "disable" "httpd"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 2: check resource status"
|
||||
# Define resource list without httpd
|
||||
OVERCLOUD_RESOURCES_NO_KEYSTONE="$(echo $OVERCLOUD_RESOURCES | sed 's/httpd/ /g')"
|
||||
# Define number of minutes to look for status
|
||||
MINUTES=10
|
||||
# Cycling for $MINUTES minutes polling every minute the status of the resources
|
||||
echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
|
||||
i=0
|
||||
while [ $i -lt $MINUTES ]
|
||||
do
|
||||
# Wait a minute
|
||||
sleep 60
|
||||
echo "$(date) - Polling..."
|
||||
for resource in $OVERCLOUD_RESOURCES_NO_KEYSTONE
|
||||
do
|
||||
echo -n "$resource -> "
|
||||
check_resource_status "$resource" "Started"
|
||||
[ $? -eq 0 ] && echo "OK" || (FAILURES=1; echo "Error!")
|
||||
done
|
||||
let "i++"
|
||||
done
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
```
|
||||
|
||||
Code is commented and should be self explaining, but in short:
|
||||
- the first commented line, after "# Test: " is read as test title;
|
||||
- using play_on_resources it disables httpd resource;
|
||||
- it checks for failed actions;
|
||||
- it defines a list of variable named OVERCLOUD_RESOURCES_NO_KEYSTONE containing
|
||||
all the variable but httpd;
|
||||
- it cycles for 10 minutes, polling every minute the status of all the
|
||||
resources;
|
||||
|
||||
If any of these steps for some reason fails, then the overall test will be
|
||||
considered failed and the exit status will not be 0.
|
||||
|
||||
### Recovery file contents
|
||||
|
||||
A typical recovery file, say recovery/recovery_keystone-constraint-removal,
|
||||
will contain something like this:
|
||||
|
||||
```bash
|
||||
# Recovery: Enable keystone via httpd and check for failed actions
|
||||
|
||||
echo "$(date) * Step 1: enable keystone resource via httpd"
|
||||
play_on_resources "enable" "httpd-clone"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:" check_failed_actions
|
||||
```
|
||||
|
||||
Again:
|
||||
- the first commented line, after "# Recovery: " is read as recovery title;
|
||||
- using play_on_resources it enables httpd resource;
|
||||
- it checks for failed actions;
|
@ -1,80 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Raoul Scarazzini (rasca@redhat.com)
|
||||
# This script provides a testing suite for TripleO HA environments
|
||||
|
||||
# Define main workdir
|
||||
WORKDIR=$(dirname $0)
|
||||
|
||||
# Source function library.
|
||||
. $WORKDIR/include/functions
|
||||
|
||||
# Fixed parameters
|
||||
# How much time wait in seconds for a resource to change status (i.e. from started to stopped)
|
||||
RESOURCE_CHANGE_STATUS_TIMEOUT=600
|
||||
# How much time wait in seconds before starting recovery
|
||||
DEFAULT_RECOVERY_WAIT_TIME=10
|
||||
|
||||
# Command line parameters
|
||||
if [ $# -gt 0 ]
|
||||
then
|
||||
while :; do
|
||||
case $1 in
|
||||
-h|-\?|--help)
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
-t|--test)
|
||||
test_sequence="$2"
|
||||
shift
|
||||
;;
|
||||
-r|--recover)
|
||||
recovery_sequence="$2"
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
-?*)
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
esac
|
||||
|
||||
shift
|
||||
done
|
||||
else
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Populating overcloud elements
|
||||
echo -n "$(date) - Populationg overcloud elements..."
|
||||
OVERCLOUD_CORE_RESOURCES="galera redis rabbitmq"
|
||||
OVERCLOUD_RESOURCES=$(sudo pcs resource show | egrep '^ (C|[a-Z])' | sed 's/.* \[\(.*\)\]/\1/g' | sed 's/ \(.*\)(.*):.*/\1/g' | sort)
|
||||
OVERCLOUD_SYSTEMD_RESOURCES=$(sudo pcs config show | egrep "Resource:.*systemd"|grep -v "haproxy"|awk '{print $2}')
|
||||
echo "OK"
|
||||
|
||||
if [ -f "$test_sequence" ]
|
||||
then
|
||||
echo "$(date) - Test: $(grep '^#.*Test:' $test_sequence | sed 's/^#.*Test: //')"
|
||||
. $test_sequence
|
||||
else
|
||||
echo "No test file passed or unable to read test file."
|
||||
fi
|
||||
|
||||
if [ -f "$recovery_sequence" ]
|
||||
then
|
||||
echo "$(date) - Waiting $DEFAULT_RECOVERY_WAIT_TIME seconds to recover environment"
|
||||
sleep $DEFAULT_RECOVERY_WAIT_TIME
|
||||
|
||||
echo "$(date) - Recovery: $(grep '^#.*Recovery:' $recovery_sequence | sed 's/^#.*Recovery: //')"
|
||||
. $recovery_sequence
|
||||
else
|
||||
echo "No recovery file passed or unable to read recovery file."
|
||||
fi
|
||||
|
||||
echo "$(date) - End"
|
@ -1,151 +0,0 @@
|
||||
# Raoul Scarazzini (rasca@redhat.com)
|
||||
# This script provides a testing suite from TripleO/Directory OpenStack HA (so
|
||||
# with Pacemaker) environments functions to be used inside TripleO/Director
|
||||
# OpenStack HA environments
|
||||
|
||||
function usage {
|
||||
echo "Usage $0 -t <testfile> [-r <recover file>] [-u]
|
||||
-t, --test <testfile> Specify which file contains the test to run
|
||||
-r, --recover <recoverfile> Specify which file (if any) should be used for recovery
|
||||
-u, --undercloud Test will be performed on undercloud
|
||||
"
|
||||
}
|
||||
|
||||
function check_failed_actions {
|
||||
resource=$1
|
||||
|
||||
sudo pcs status | grep "Failed Actions:" &> /dev/null
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
if [ "x$resource" == "x" ]
|
||||
then
|
||||
echo "Cluster has failed actions:"
|
||||
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_|not running|unknown' | awk '{print $2}' | cut -f1 -d_ | sort |uniq
|
||||
exit 1
|
||||
else
|
||||
errors=$(sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | grep -A1 $resource)
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
echo "Resource $resource has failed actions:"
|
||||
echo $errors
|
||||
exit 1
|
||||
else
|
||||
echo "No failed actions for $resource."
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
else
|
||||
[ "x$resource" == "x" ] && echo "Cluster is OK." || echo "No failed actions for $resource."
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
function check_resources_process_status {
|
||||
for resource in $OVERCLOUD_RESOURCES
|
||||
do
|
||||
echo -n "$resource -> "
|
||||
|
||||
case $resource in
|
||||
ip-*) #ip_addr=$(pcs resource show $resource | grep Attributes | sed 's/.*ip=\(.*\) cidr.*/\1/g')
|
||||
ip_addr=$(echo $resource | sed 's/ip-//g')
|
||||
sudo ip a s | grep $ip_addr &> /dev/null
|
||||
;;
|
||||
rabbitmq) sudo /usr/sbin/rabbitmqctl cluster_status &> /dev/null
|
||||
;;
|
||||
redis) pidof /usr/bin/redis-server &> /dev/null
|
||||
;;
|
||||
galera) pidof /usr/libexec/mysqld &> /dev/null
|
||||
;;
|
||||
*cleanup*|delay) echo -n "no need to check if it's "
|
||||
;;
|
||||
*) systemctl is-active $resource &> /dev/null
|
||||
;;
|
||||
esac
|
||||
|
||||
[ $? -eq 0 ] && echo "active" || echo "inactive"
|
||||
|
||||
done
|
||||
}
|
||||
|
||||
function wait_resource_status {
|
||||
resource=$1
|
||||
status=$2
|
||||
i=1
|
||||
|
||||
while [ $i -lt $RESOURCE_CHANGE_STATUS_TIMEOUT ]
|
||||
do
|
||||
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
||||
if [ "x$output" == "x" ]
|
||||
then
|
||||
return 0
|
||||
break
|
||||
else
|
||||
echo -n "."
|
||||
sleep 1
|
||||
let "i++"
|
||||
fi
|
||||
done
|
||||
check_failed_actions
|
||||
exit 1
|
||||
}
|
||||
|
||||
function check_resource_status {
|
||||
resource=$1
|
||||
status=$2
|
||||
|
||||
output=$(sudo pcs status resources | sed -n -e "/\(Clone\|Master\/Slave\) Set: .*\[$resource\]/,/^ [a-Z]/p" | head -n -1 | tail -n +2 | egrep -v "$status\:")
|
||||
# Since we are checking a specific status, if we have output from above it
|
||||
# means that for some reason the resource is not in the state we are expecting
|
||||
[ "x$output" == "x" ] && return 0 || (check_failed_actions; exit 1)
|
||||
}
|
||||
|
||||
function wait_cluster_start {
|
||||
i=1
|
||||
while true; do
|
||||
[ $i -eq $RESOURCE_CHANGE_STATUS_TIMEOUT ] && break
|
||||
|
||||
# Check for failed actions
|
||||
sudo pcs status | egrep "Failed" &> /dev/null
|
||||
[ $? -eq 0 ] && break
|
||||
|
||||
# If we have stopped resources let's wait
|
||||
sudo pcs status | egrep "Stopped" &> /dev/null
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
echo -n "."
|
||||
else
|
||||
echo "All cluster resources are started."
|
||||
return 0
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
let "i++"
|
||||
done
|
||||
|
||||
# If we are here than we have problems: we hit timeout or we still have
|
||||
# stopped resources
|
||||
echo "Problems found. There are stopped or failed resources!"
|
||||
check_failed_actions
|
||||
exit 1
|
||||
}
|
||||
|
||||
function play_on_resources {
|
||||
action=$1
|
||||
resources=$2
|
||||
|
||||
for resource in $resources
|
||||
do
|
||||
echo -n "$(date) - Performing action $action on resource $resource "
|
||||
# Do the action on the resource
|
||||
sudo pcs resource $action $resource --wait=$RESOURCE_CHANGE_STATUS_TIMEOUT
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "FAILURE!"
|
||||
check_failed_actions $resource
|
||||
exit 1
|
||||
else
|
||||
echo "OK"
|
||||
fi
|
||||
done
|
||||
return 0
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
# Recovery: Enable all systemd and core resources, cleanup failed actions
|
||||
|
||||
echo "$(date) * Step 1: enable all the cluster resources"
|
||||
play_on_resources "enable" "$OVERCLOUD_RESOURCES"
|
||||
|
||||
echo "$(date) * Step 2: Cleaning up failed resources"
|
||||
sudo pcs status | sed -n -e '/Failed Actions:/,/^$/p' | egrep 'OCF_TIMEOUT|not running' | awk '{print $2}' | cut -f1 -d_ | sort | uniq | while read RES; do echo "Cleaning $RES"; sudo pcs resource cleanup $RES; done
|
||||
|
||||
echo "$(date) * Step 3: Waiting all resources to start"
|
||||
wait_cluster_start
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,7 +0,0 @@
|
||||
# Recovery: Enable keystone via httpd and check for failed actions
|
||||
|
||||
echo "$(date) * Step 1: enable keystone resource via httpd"
|
||||
play_on_resources "enable" "httpd-clone"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,10 +0,0 @@
|
||||
# Recovery: Enable openstack-keystone and check for failed actions
|
||||
|
||||
echo "$(date) * Step 1: enable openstack-keystone resource"
|
||||
play_on_resources "enable" "openstack-keystone-clone"
|
||||
|
||||
echo "$(date) - Checking for Stopped resources:"
|
||||
wait_cluster_start
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,7 +0,0 @@
|
||||
# Recovery: Enable master slave resources (galera and redis), all the resources should come up
|
||||
|
||||
echo "$(date) * Step 1: enable galera, redis and rabbitmq"
|
||||
play_on_resources "enable" "$OVERCLOUD_CORE_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,7 +0,0 @@
|
||||
# Recovery: Enable mongo and check for failed actions
|
||||
|
||||
echo "$(date) * Step 1: enable mongo"
|
||||
play_on_resources "enable" "mongo"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,13 +0,0 @@
|
||||
# Recovery: Enable all systemd and core resources, cleanup failed actions
|
||||
|
||||
echo "$(date) * Step 1: enable core resources"
|
||||
play_on_resources "enable" "$OVERCLOUD_CORE_RESOURCES"
|
||||
|
||||
echo "$(date) * Step 2: enable all the systemd resources"
|
||||
play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
||||
|
||||
echo "$(date) * Step 3: Waiting all resources to start"
|
||||
wait_cluster_start
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,10 +0,0 @@
|
||||
# Recovery: Start cluster again
|
||||
|
||||
echo "$(date) * Step 1: start the cluster"
|
||||
sudo pcs cluster start --all
|
||||
|
||||
echo "$(date) * Step 2: Waiting all resources to start"
|
||||
wait_cluster_start
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,3 +0,0 @@
|
||||
# Test: Wait cluster start and look for failed actions
|
||||
echo "$(date) - Waiting for cluster start and checking for failed resources:"
|
||||
wait_cluster_start
|
@ -1,40 +0,0 @@
|
||||
# Test: Stop keystone resource (by stopping httpd), check no other resource is stopped
|
||||
|
||||
echo "$(date) * Step 1: disable keystone resource via httpd stop"
|
||||
play_on_resources "disable" "httpd"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 2: check resource status"
|
||||
# Define resource list without httpd
|
||||
OVERCLOUD_RESOURCES_NO_KEYSTONE="$(echo $OVERCLOUD_RESOURCES | sed 's/httpd/ /g')"
|
||||
# Define number of minutes to look for status
|
||||
MINUTES=10
|
||||
# Cycling for $MINUTES minutes polling every minute the status of the resources
|
||||
echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
|
||||
i=0
|
||||
while [ $i -lt $MINUTES ]
|
||||
do
|
||||
# Wait a minute
|
||||
sleep 60
|
||||
echo "$(date) - Polling..."
|
||||
for resource in $OVERCLOUD_RESOURCES_NO_KEYSTONE
|
||||
do
|
||||
echo -n "$resource -> "
|
||||
# If the resource is a multi state like galera or redis, do a different check
|
||||
case $resource in
|
||||
"galera") check_resource_status "$resource" "Masters"
|
||||
;;
|
||||
"redis") check_resource_status "$resource" "(Masters|Slaves)"
|
||||
;;
|
||||
*) check_resource_status "$resource" "Started"
|
||||
;;
|
||||
esac
|
||||
[ $? -eq 0 ] && echo "OK" || (FAILURES=1; echo "Error!"; break)
|
||||
done
|
||||
let "i++"
|
||||
done
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,7 +0,0 @@
|
||||
# Test: Stop openstack-keystone and look for failed actions
|
||||
|
||||
echo "$(date) * Step 1: disable openstack-keystone resource"
|
||||
play_on_resources "disable" "openstack-keystone-clone"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,7 +0,0 @@
|
||||
# Test: Stop master slave resources (galera and redis), all the resources should come down
|
||||
|
||||
echo "$(date) * Step 1: disable galera, redis and rabbitmq"
|
||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,43 +0,0 @@
|
||||
# Test: Stop mongo resource, check related systemd resources are fine
|
||||
|
||||
echo "$(date) * Step 1: disable mongo"
|
||||
play_on_resources "disable" "mongo"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 2: check resource status"
|
||||
# Define related resources
|
||||
OVERCLOUD_RESOURCES="openstack-aodh-evaluator openstack-aodh-listener openstack-aodh-notifier openstack-ceilometer-central.service openstack-ceilometer-collector.service openstack-ceilometer-notification.service"
|
||||
# Define number of minutes to look for status
|
||||
MINUTES=10
|
||||
# Cycling for $MINUTES minutes polling every minute the status of the resources
|
||||
echo "$(date) - Cycling for 10 minutes polling every minute the status of the resources"
|
||||
i=0
|
||||
while [ $i -lt $MINUTES ]
|
||||
do
|
||||
# Wait a minute
|
||||
sleep 60
|
||||
echo "$(date) - Polling..."
|
||||
for resource in $OVERCLOUD_RESOURCES
|
||||
do
|
||||
echo -n "$resource -> "
|
||||
# Check if the resource is active for the system
|
||||
systemctl is-active $resource
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
# Show status of the resource
|
||||
echo "Error! Resource $resource is not active anymore."
|
||||
systemctl status $resource
|
||||
# Check in any case cluster's failed actions
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
# Now exit with an error
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
let "i++"
|
||||
done
|
||||
|
||||
# If we are here, test was successful
|
||||
echo "$(date) - Test was successful"
|
@ -1,19 +0,0 @@
|
||||
# Test: Stop every systemd resource, stop Galera and Rabbitmq, Start every systemd resource
|
||||
|
||||
echo "$(date) * Step 1: disable all the systemd resources"
|
||||
play_on_resources "disable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 2: disable core services"
|
||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 3: enable each resource one by one and check the status"
|
||||
play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,19 +0,0 @@
|
||||
# Test: Stop Galera and Rabbitmq, stop every systemd resource, Start every systemd resource
|
||||
|
||||
echo "$(date) * Step 1: disable core services"
|
||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 2: disable all the systemd resources"
|
||||
play_on_resources "disable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 3: enable all the systemd resources"
|
||||
play_on_resources "enable" "$OVERCLOUD_SYSTEMD_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,22 +0,0 @@
|
||||
# Test: Stop Galera and Rabbitmq, wait 20 minutes to see if something fails
|
||||
|
||||
echo "$(date) * Step 1: disable core services"
|
||||
play_on_resources "disable" "$OVERCLOUD_CORE_RESOURCES"
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
||||
|
||||
echo "$(date) * Step 2: poll every minute for twenty minutes the status of the resources"
|
||||
for i in $(seq 1 20)
|
||||
do
|
||||
check_failed_actions
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Errors found, test is over."
|
||||
break
|
||||
fi
|
||||
sleep 60
|
||||
done
|
||||
|
||||
echo "$(date) - List of cluster's failed actions:"
|
||||
check_failed_actions
|
@ -1,10 +0,0 @@
|
||||
# Test: Check active processes after cluster stop
|
||||
|
||||
echo "$(date) * Step 1: checking actual process status"
|
||||
check_resources_process_status
|
||||
|
||||
echo "$(date) * Step 2: stopping cluster"
|
||||
sudo pcs cluster stop --all
|
||||
|
||||
echo "$(date) * Step 3: checking actual process status"
|
||||
check_resources_process_status
|
Loading…
x
Reference in New Issue
Block a user