c94cc4a61a
There seems to be a bug in Galera that causes TASK [mariadb : Check MariaDB service WSREP sync status] to fail. One (in case of 3-node cluster) or more (possible with more-than-3-node clusters) nodes may "lose the race" and get stuck in the "initialized" state of WSREP. This is entirely random as is the case with most race issues. MariaDB service restart on that node will fix the situation but it's unwieldy. The above may happen because Kolla Ansible starts and waits for all new nodes at once. This did not bother the old galera (galera 3) which figured out the ordering for itself and let each node join the cluster properly. The proposed workaround is to start and wait for nodes serially. Change-Id: I449d4c2073d4e3953e9f09725577d2e1c9d563c9 Closes-Bug: #1947485
101 lines
3.0 KiB
YAML
101 lines
3.0 KiB
YAML
---
|
|
- name: Starting first MariaDB container
|
|
vars:
|
|
service_name: "mariadb"
|
|
service: "{{ mariadb_services[service_name] }}"
|
|
become: true
|
|
kolla_docker:
|
|
action: "start_container"
|
|
common_options: "{{ docker_common_options }}"
|
|
environment:
|
|
KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
|
|
BOOTSTRAP_ARGS: "--wsrep-new-cluster"
|
|
image: "{{ service.image }}"
|
|
labels:
|
|
BOOTSTRAP:
|
|
name: "{{ service.container_name }}"
|
|
restart_policy: no
|
|
volumes: "{{ service.volumes }}"
|
|
dimensions: "{{ service.dimensions }}"
|
|
listen: Bootstrap MariaDB cluster
|
|
|
|
# NOTE(yoctozepto): We have to loop this to avoid breaking on connection resets
|
|
- name: Wait for first MariaDB service port liveness
|
|
wait_for:
|
|
host: "{{ api_interface_address }}"
|
|
port: "{{ mariadb_port }}"
|
|
connect_timeout: 1
|
|
timeout: 60
|
|
search_regex: "MariaDB"
|
|
register: check_mariadb_port
|
|
until: check_mariadb_port is success
|
|
retries: 10
|
|
delay: 6
|
|
listen: Bootstrap MariaDB cluster
|
|
|
|
- name: Wait for first MariaDB service to sync WSREP
|
|
become: true
|
|
command: >-
|
|
docker exec {{ mariadb_service.container_name }}
|
|
mysql -uroot -p{{ database_password }}
|
|
--silent --skip-column-names
|
|
-e 'SHOW STATUS LIKE "wsrep_local_state_comment"'
|
|
changed_when: false
|
|
register: result
|
|
until: result.stdout == "wsrep_local_state_comment\tSynced"
|
|
retries: 10
|
|
delay: 6
|
|
no_log: true
|
|
listen: Bootstrap MariaDB cluster
|
|
|
|
- name: Restart MariaDB on existing cluster members
|
|
include_tasks: 'restart_services.yml'
|
|
when:
|
|
- groups[mariadb_shard_group + '_port_alive_True'] is defined
|
|
- inventory_hostname in groups[mariadb_shard_group + '_port_alive_True']
|
|
- groups[mariadb_shard_group + '_port_alive_True'].index(inventory_hostname) % 4 == item
|
|
- kolla_action != "config"
|
|
listen: restart mariadb
|
|
loop:
|
|
- 0
|
|
- 1
|
|
- 2
|
|
- 3
|
|
|
|
- name: Start MariaDB on new nodes
|
|
include_tasks: 'restart_services.yml'
|
|
when:
|
|
- bootstrap_host is not defined or bootstrap_host != inventory_hostname
|
|
- groups[mariadb_shard_group + '_port_alive_False'] is defined
|
|
- inventory_hostname in groups[mariadb_shard_group + '_port_alive_False']
|
|
- groups[mariadb_shard_group + '_port_alive_False'].index(inventory_hostname) % 4 == item
|
|
- kolla_action != "config"
|
|
listen: restart mariadb
|
|
loop:
|
|
- 0
|
|
- 1
|
|
- 2
|
|
- 3
|
|
|
|
- name: Ensure MariaDB is running normally on bootstrap host
|
|
include_tasks: 'restart_services.yml'
|
|
listen: Bootstrap MariaDB cluster
|
|
|
|
- name: Restart mariadb-clustercheck container
|
|
vars:
|
|
service_name: "mariadb-clustercheck"
|
|
service: "{{ mariadb_services[service_name] }}"
|
|
become: true
|
|
kolla_docker:
|
|
action: "recreate_or_restart_container"
|
|
common_options: "{{ docker_common_options }}"
|
|
image: "{{ service.image }}"
|
|
name: "{{ service.container_name }}"
|
|
volumes: "{{ service.volumes }}"
|
|
dimensions: "{{ service.dimensions }}"
|
|
environment: "{{ service.environment }}"
|
|
listen:
|
|
- restart mariadb-clustercheck
|
|
when:
|
|
- kolla_action != "config"
|