Join cluster without using a failed task

Currently a new node joining the cluster is done by using
a try/rescue operation which always fails for a new node,
causing a lot of confusion due to the failed task report.

This patch ensures that the implementation no longer does
that.

In order to prevent lint check failures, each join task
has a 'changed_when: true' added.

Change-Id: Ic1da9f3ad1016831fe37643165880e7ff98ca923
This commit is contained in:
Jesse Pretorius 2018-02-10 14:01:43 +00:00
parent ad29910e6c
commit 83b398e18b
2 changed files with 65 additions and 43 deletions

View File

@ -13,24 +13,35 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# The output of 'rabbitmqctl -q cluster_status' looks like this:
# [{nodes,[{disc,[rabbit@container1,rabbit@container2,rabbit@container3]}]},
# {running_nodes,[rabbit@container3,rabbit@container1,rabbit@container2]},
# {cluster_name,<<"rabbitmq_cluster1">>},
# {partitions,[]},
# {alarms,[{rabbit@container3,[]},
# {rabbit@container1,[]},
# {rabbit@container2,[]}]}]
#
# Our solution to get the cluster name out cleanly is inspired by
# https://unix.stackexchange.com/a/13472
- name: Get rabbitmq cluster name - name: Get rabbitmq cluster name
shell: | shell: |
return_code=0 rabbitmqctl -q cluster_status | grep -oP '(?<={cluster_name,<<").*(?=">>})'
if ! rabbitmqctl cluster_status | grep -w '<<"{{ rabbitmq_cluster_name }}">>'; then args:
rabbitmqctl set_cluster_name {{ rabbitmq_cluster_name }} executable: /bin/bash
return_code=2 changed_when: false
fi register: _cluster_name
exit ${return_code}
when: ansible_hostname == rabbitmq_primary_cluster_node
register: _set_cluster_name
changed_when: _set_cluster_name.rc == 2
failed_when: _set_cluster_name.rc not in [0, 2]
# We skip ansible lint testing for this task as it fails with
# ANSIBLE0014 Environment variables don't work as part of command
# which is nonsense.
tags:
- skip_ansible_lint
- include: rabbitmq_cluster_join.yml - name: Set rabbitmq cluster name on primary node
command: |
rabbitmqctl set_cluster_name {{ rabbitmq_cluster_name }}
when:
- "ansible_hostname == rabbitmq_primary_cluster_node"
- "_cluster_name.stdout != rabbitmq_cluster_name"
- name: Join cluster on secondary nodes
include: rabbitmq_cluster_join.yml
static: no static: no
when: ansible_hostname != rabbitmq_primary_cluster_node when:
- "ansible_hostname != rabbitmq_primary_cluster_node"
- "_cluster_name.stdout != rabbitmq_cluster_name"

View File

@ -13,31 +13,42 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# If cluster name is our own hostname, we assume we're not properly clustered - name: Stop rabbitmq app
# TODO(someone): implement a more robust way of checking
# if node is clustered or not
- block:
- name: Check cluster status
shell: | shell: |
rabbitmqctl -q cluster_status | grep '{cluster_name,<<"{{ rabbitmq_cluster_name }}">>}' rabbitmqctl stop_app
changed_when: false sleep 5
args:
executable: /bin/bash
tags:
# This task must use shell, otherwise the rabbitmqctl
# command somehow thinks that the sleep command is a
# parameter. Due to this, we skip ansible-lint checks
# on this task.
- skip_ansible_lint
rescue: - name: Join rabbitmq cluster
- name: Stop rabbitmq app
shell: |
rabbitmqctl stop_app; sleep 5
- name: Join rabbitmq cluster
command: > command: >
rabbitmqctl join_cluster "rabbit@{{ rabbitmq_primary_cluster_node.split('.')[0] }}" rabbitmqctl join_cluster "rabbit@{{ rabbitmq_primary_cluster_node.split('.')[0] }}"
register: rabbit_join_cluster register: rabbit_join_cluster
until: rabbit_join_cluster|success until: rabbit_join_cluster|success
retries: 5 retries: 5
delay: 2 delay: 2
tags:
# This task only gets executed on a condition
# in the rabbitmq_cluster.yml file, but ansible-lint
# does not seem to realise this and fails this task.
# Due to this, we skip ansible-lint checks on this task.
- skip_ansible_lint
- name: Start rabbitmq app - name: Start rabbitmq app
command: rabbitmqctl start_app command: rabbitmqctl start_app
register: rabbit_start_app register: rabbit_start_app
until: rabbit_start_app|success until: rabbit_start_app|success
retries: 5 retries: 5
delay: 2 delay: 2
tags:
# This task only gets executed on a condition
# in the rabbitmq_cluster.yml file, but ansible-lint
# does not seem to realise this and fails this task.
# Due to this, we skip ansible-lint checks on this task.
- skip_ansible_lint