Join cluster without using a failed task
Currently a new node joining the cluster is done by using a try/rescue operation which always fails for a new node, causing a lot of confusion due to the failed task report. This patch ensures that the implementation no longer does that. In order to prevent lint check failures, each join task has a 'changed_when: true' added. Change-Id: Ic1da9f3ad1016831fe37643165880e7ff98ca923
This commit is contained in:
parent
ad29910e6c
commit
83b398e18b
@ -13,24 +13,35 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
|
# The output of 'rabbitmqctl -q cluster_status' looks like this:
|
||||||
|
# [{nodes,[{disc,[rabbit@container1,rabbit@container2,rabbit@container3]}]},
|
||||||
|
# {running_nodes,[rabbit@container3,rabbit@container1,rabbit@container2]},
|
||||||
|
# {cluster_name,<<"rabbitmq_cluster1">>},
|
||||||
|
# {partitions,[]},
|
||||||
|
# {alarms,[{rabbit@container3,[]},
|
||||||
|
# {rabbit@container1,[]},
|
||||||
|
# {rabbit@container2,[]}]}]
|
||||||
|
#
|
||||||
|
# Our solution to get the cluster name out cleanly is inspired by
|
||||||
|
# https://unix.stackexchange.com/a/13472
|
||||||
- name: Get rabbitmq cluster name
|
- name: Get rabbitmq cluster name
|
||||||
shell: |
|
shell: |
|
||||||
return_code=0
|
rabbitmqctl -q cluster_status | grep -oP '(?<={cluster_name,<<").*(?=">>})'
|
||||||
if ! rabbitmqctl cluster_status | grep -w '<<"{{ rabbitmq_cluster_name }}">>'; then
|
args:
|
||||||
rabbitmqctl set_cluster_name {{ rabbitmq_cluster_name }}
|
executable: /bin/bash
|
||||||
return_code=2
|
changed_when: false
|
||||||
fi
|
register: _cluster_name
|
||||||
exit ${return_code}
|
|
||||||
when: ansible_hostname == rabbitmq_primary_cluster_node
|
|
||||||
register: _set_cluster_name
|
|
||||||
changed_when: _set_cluster_name.rc == 2
|
|
||||||
failed_when: _set_cluster_name.rc not in [0, 2]
|
|
||||||
# We skip ansible lint testing for this task as it fails with
|
|
||||||
# ANSIBLE0014 Environment variables don't work as part of command
|
|
||||||
# which is nonsense.
|
|
||||||
tags:
|
|
||||||
- skip_ansible_lint
|
|
||||||
|
|
||||||
- include: rabbitmq_cluster_join.yml
|
- name: Set rabbitmq cluster name on primary node
|
||||||
|
command: |
|
||||||
|
rabbitmqctl set_cluster_name {{ rabbitmq_cluster_name }}
|
||||||
|
when:
|
||||||
|
- "ansible_hostname == rabbitmq_primary_cluster_node"
|
||||||
|
- "_cluster_name.stdout != rabbitmq_cluster_name"
|
||||||
|
|
||||||
|
- name: Join cluster on secondary nodes
|
||||||
|
include: rabbitmq_cluster_join.yml
|
||||||
static: no
|
static: no
|
||||||
when: ansible_hostname != rabbitmq_primary_cluster_node
|
when:
|
||||||
|
- "ansible_hostname != rabbitmq_primary_cluster_node"
|
||||||
|
- "_cluster_name.stdout != rabbitmq_cluster_name"
|
||||||
|
@ -13,31 +13,42 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# If cluster name is our own hostname, we assume we're not properly clustered
|
- name: Stop rabbitmq app
|
||||||
# TODO(someone): implement a more robust way of checking
|
shell: |
|
||||||
# if node is clustered or not
|
rabbitmqctl stop_app
|
||||||
- block:
|
sleep 5
|
||||||
- name: Check cluster status
|
args:
|
||||||
shell: |
|
executable: /bin/bash
|
||||||
rabbitmqctl -q cluster_status | grep '{cluster_name,<<"{{ rabbitmq_cluster_name }}">>}'
|
tags:
|
||||||
changed_when: false
|
# This task must use shell, otherwise the rabbitmqctl
|
||||||
|
# command somehow thinks that the sleep command is a
|
||||||
|
# parameter. Due to this, we skip ansible-lint checks
|
||||||
|
# on this task.
|
||||||
|
- skip_ansible_lint
|
||||||
|
|
||||||
rescue:
|
- name: Join rabbitmq cluster
|
||||||
- name: Stop rabbitmq app
|
command: >
|
||||||
shell: |
|
rabbitmqctl join_cluster "rabbit@{{ rabbitmq_primary_cluster_node.split('.')[0] }}"
|
||||||
rabbitmqctl stop_app; sleep 5
|
register: rabbit_join_cluster
|
||||||
|
until: rabbit_join_cluster|success
|
||||||
|
retries: 5
|
||||||
|
delay: 2
|
||||||
|
tags:
|
||||||
|
# This task only gets executed on a condition
|
||||||
|
# in the rabbitmq_cluster.yml file, but ansible-lint
|
||||||
|
# does not seem to realise this and fails this task.
|
||||||
|
# Due to this, we skip ansible-lint checks on this task.
|
||||||
|
- skip_ansible_lint
|
||||||
|
|
||||||
- name: Join rabbitmq cluster
|
- name: Start rabbitmq app
|
||||||
command: >
|
command: rabbitmqctl start_app
|
||||||
rabbitmqctl join_cluster "rabbit@{{ rabbitmq_primary_cluster_node.split('.')[0] }}"
|
register: rabbit_start_app
|
||||||
register: rabbit_join_cluster
|
until: rabbit_start_app|success
|
||||||
until: rabbit_join_cluster|success
|
retries: 5
|
||||||
retries: 5
|
delay: 2
|
||||||
delay: 2
|
tags:
|
||||||
|
# This task only gets executed on a condition
|
||||||
- name: Start rabbitmq app
|
# in the rabbitmq_cluster.yml file, but ansible-lint
|
||||||
command: rabbitmqctl start_app
|
# does not seem to realise this and fails this task.
|
||||||
register: rabbit_start_app
|
# Due to this, we skip ansible-lint checks on this task.
|
||||||
until: rabbit_start_app|success
|
- skip_ansible_lint
|
||||||
retries: 5
|
|
||||||
delay: 2
|
|
||||||
|
Loading…
Reference in New Issue
Block a user