From 6c037790f2802d626902d377486ec2f8ce2c8cda Mon Sep 17 00:00:00 2001
From: Mark Goddard <mark@stackhpc.com>
Date: Wed, 24 May 2023 12:21:03 +0100
Subject: [PATCH] Refactor MariaDB and RabbitMQ restart procedure

Ansible 2.14.3 introduced a change that broke the method used for
restarting MariaDB and RabbitMQ serially [1][2]. In
I57425680a4cdbf0daeb9b2cc35920f1b933aa4a8 we limited to 2.14.2 to work
around this. Ansible upstream claim this behaviour was unintentional,
and will not fix it.

This change moves to a different approach where we use separate plays
with a 'serial' keyword to execute the restart.

This change also removes the restriction on the maximum supported
version of 2.14.2 on ansible-core - any 2.14 release is now supported.

[1] https://github.com/ansible/ansible/commit/65366f663de7d044f42ae6dd53368fd4c1f88b35
[2] https://github.com/ansible/ansible/issues/80848

Depends-On: https://review.opendev.org/c/openstack/kolla/+/884208

Change-Id: I5a12670d07077d24047aaff57ce8d33ccf7156ff
---
 ansible/mariadb.yml                           |  72 +++++++++++
 ansible/rabbitmq.yml                          | 115 ++++++++++++++++++
 ansible/roles/mariadb/handlers/main.yml       |  27 ++--
 ansible/roles/mariadb/tasks/deploy.yml        |   7 --
 ansible/roles/mariadb/tasks/post-deploy.yml   |   4 +
 ansible/roles/mariadb/tasks/post-upgrade.yml  |  24 ++++
 ansible/roles/mariadb/tasks/upgrade.yml       |  24 ----
 ansible/roles/prechecks/vars/main.yml         |   2 +-
 ansible/roles/rabbitmq/handlers/main.yml      |  26 +---
 ansible/roles/rabbitmq/tasks/deploy.yml       |   5 -
 ansible/roles/rabbitmq/tasks/post-deploy.yml  |   2 +
 ansible/roles/rabbitmq/tasks/upgrade.yml      |   3 -
 ansible/site.yml                              |  50 +-------
 doc/source/user/quickstart.rst                |   4 +-
 doc/source/user/virtual-environments.rst      |   1 -
 ...adb-restart-refactor-529502bf5d12b0f4.yaml |  10 ++
 tests/run.yml                                 |   2 +-
 17 files changed, 246 insertions(+), 132 deletions(-)
 create mode 100644 ansible/mariadb.yml
 create mode 100644 ansible/rabbitmq.yml
 create mode 100644 ansible/roles/mariadb/tasks/post-deploy.yml
 create mode 100644 ansible/roles/mariadb/tasks/post-upgrade.yml
 create mode 100644 ansible/roles/rabbitmq/tasks/post-deploy.yml
 create mode 100644 releasenotes/notes/mariadb-restart-refactor-529502bf5d12b0f4.yaml

diff --git a/ansible/mariadb.yml b/ansible/mariadb.yml
new file mode 100644
index 0000000000..0301adf6a9
--- /dev/null
+++ b/ansible/mariadb.yml
@@ -0,0 +1,72 @@
+---
+# For MariaDB we need to be careful about restarting services, to avoid losing quorum.
+- name: Apply role mariadb
+  gather_facts: false
+  hosts:
+    - mariadb
+    - '&enable_mariadb_True'
+  tags:
+    - mariadb
+  tasks:
+    - import_role:
+        name: mariadb
+
+- name: Restart mariadb services
+  gather_facts: false
+  hosts:
+    - mariadb_restart
+    - '&enable_mariadb_True'
+  # Restart in batches
+  serial: "33%"
+  tags:
+    - mariadb
+  tasks:
+    - import_role:
+        name: mariadb
+        tasks_from: restart_services.yml
+
+- name: Start mariadb services
+  gather_facts: false
+  hosts:
+    - mariadb_start
+    - '&enable_mariadb_True'
+  # Start in batches
+  serial: "33%"
+  tags:
+    - mariadb
+  tasks:
+    - import_role:
+        name: mariadb
+        tasks_from: restart_services.yml
+
+- name: Restart bootstrap mariadb service
+  gather_facts: false
+  hosts:
+    - mariadb_bootstrap_restart
+    - '&enable_mariadb_True'
+  tags:
+    - mariadb
+  tasks:
+    - import_role:
+        name: mariadb
+        tasks_from: restart_services.yml
+
+- name: Apply mariadb post-configuration
+  gather_facts: false
+  hosts:
+    - mariadb
+    - '&enable_mariadb_True'
+  tags:
+    - mariadb
+  tasks:
+    - name: Include mariadb post-deploy.yml
+      include_role:
+        name: mariadb
+        tasks_from: post-deploy.yml
+      when: kolla_action in ['deploy', 'reconfigure', 'upgrade']
+
+    - name: Include mariadb post-upgrade.yml
+      include_role:
+        name: mariadb
+        tasks_from: post-upgrade.yml
+      when: kolla_action == 'upgrade'
diff --git a/ansible/rabbitmq.yml b/ansible/rabbitmq.yml
new file mode 100644
index 0000000000..370600c8eb
--- /dev/null
+++ b/ansible/rabbitmq.yml
@@ -0,0 +1,115 @@
+---
+# For RabbitMQ we need to be careful about restarting services, to avoid losing quorum.
+- name: Apply role rabbitmq
+  gather_facts: false
+  hosts:
+    - rabbitmq
+    - '&enable_rabbitmq_True'
+  tags:
+    - rabbitmq
+  tasks:
+    - import_role:
+        name: rabbitmq
+      vars:
+        role_rabbitmq_cluster_cookie: '{{ rabbitmq_cluster_cookie }}'
+        role_rabbitmq_cluster_port: '{{ rabbitmq_cluster_port }}'
+        role_rabbitmq_epmd_port: '{{ rabbitmq_epmd_port }}'
+        role_rabbitmq_groups: rabbitmq
+        role_rabbitmq_management_port: '{{ rabbitmq_management_port }}'
+        role_rabbitmq_monitoring_password: '{{ rabbitmq_monitoring_password }}'
+        role_rabbitmq_monitoring_user: '{{ rabbitmq_monitoring_user }}'
+        role_rabbitmq_password: '{{ rabbitmq_password }}'
+        role_rabbitmq_port: '{{ rabbitmq_port }}'
+        role_rabbitmq_prometheus_port: '{{ rabbitmq_prometheus_port }}'
+        role_rabbitmq_user: '{{ rabbitmq_user }}'
+
+- name: Restart rabbitmq services
+  gather_facts: false
+  hosts:
+    - rabbitmq_restart
+    - '&enable_rabbitmq_True'
+  # Restart in batches
+  serial: "33%"
+  tags:
+    - rabbitmq
+  tasks:
+    - import_role:
+        name: rabbitmq
+        tasks_from: restart_services.yml
+      vars:
+        role_rabbitmq_cluster_cookie: '{{ rabbitmq_cluster_cookie }}'
+        role_rabbitmq_groups: rabbitmq
+
+- name: Apply rabbitmq post-configuration
+  gather_facts: false
+  hosts:
+    - rabbitmq
+    - '&enable_rabbitmq_True'
+  tags:
+    - rabbitmq
+  tasks:
+    - name: Include rabbitmq post-deploy.yml
+      include_role:
+        name: rabbitmq
+        tasks_from: post-deploy.yml
+      when: kolla_action in ['deploy', 'reconfigure']
+      vars:
+        role_rabbitmq_cluster_cookie: '{{ rabbitmq_cluster_cookie }}'
+        role_rabbitmq_groups: rabbitmq
+
+- name: Apply role rabbitmq (outward)
+  gather_facts: false
+  hosts:
+    - outward-rabbitmq
+    - '&enable_outward_rabbitmq_True'
+  tags:
+    - rabbitmq
+  tasks:
+    - import_role:
+        name: rabbitmq
+      vars:
+        project_name: outward_rabbitmq
+        role_rabbitmq_cluster_cookie: '{{ outward_rabbitmq_cluster_cookie }}'
+        role_rabbitmq_cluster_port: '{{ outward_rabbitmq_cluster_port }}'
+        role_rabbitmq_epmd_port: '{{ outward_rabbitmq_epmd_port }}'
+        role_rabbitmq_groups: outward-rabbitmq
+        role_rabbitmq_management_port: '{{ outward_rabbitmq_management_port }}'
+        role_rabbitmq_password: '{{ outward_rabbitmq_password }}'
+        role_rabbitmq_port: '{{ outward_rabbitmq_port }}'
+        role_rabbitmq_prometheus_port: '{{ outward_rabbitmq_prometheus_port }}'
+        role_rabbitmq_user: '{{ outward_rabbitmq_user }}'
+
+- name: Restart rabbitmq (outward) services
+  gather_facts: false
+  hosts:
+    - outward_rabbitmq_restart
+    - '&enable_outward_rabbitmq_True'
+  # Restart in batches
+  serial: "33%"
+  tags:
+    - rabbitmq
+  tasks:
+    - import_role:
+        name: rabbitmq
+        tasks_from: restart_services.yml
+      vars:
+        project_name: outward_rabbitmq
+        role_rabbitmq_cluster_cookie: '{{ outward_rabbitmq_cluster_cookie }}'
+        role_rabbitmq_groups: outward-rabbitmq
+
+- name: Apply rabbitmq (outward) post-configuration
+  gather_facts: false
+  hosts:
+    - outward-rabbitmq
+    - '&enable_outward_rabbitmq_True'
+  tags:
+    - rabbitmq
+  tasks:
+    - name: Include rabbitmq (outward) post-deploy.yml
+      include_role:
+        name: rabbitmq
+      when: kolla_action in ['deploy', 'reconfigure']
+      vars:
+        project_name: outward_rabbitmq
+        role_rabbitmq_cluster_cookie: '{{ outward_rabbitmq_cluster_cookie }}'
+        role_rabbitmq_groups: outward-rabbitmq
diff --git a/ansible/roles/mariadb/handlers/main.yml b/ansible/roles/mariadb/handlers/main.yml
index 1c76fc48ff..a327668235 100644
--- a/ansible/roles/mariadb/handlers/main.yml
+++ b/ansible/roles/mariadb/handlers/main.yml
@@ -48,38 +48,29 @@
   no_log: true
   listen: Bootstrap MariaDB cluster
 
+- name: Ensure MariaDB is running normally on bootstrap host
+  group_by:
+    key: mariadb_bootstrap_restart
+  listen: Bootstrap MariaDB cluster
+
 - name: Restart MariaDB on existing cluster members
-  include_tasks: 'restart_services.yml'
+  group_by:
+    key: mariadb_restart
   when:
     - groups[mariadb_shard_group + '_port_alive_True'] is defined
     - inventory_hostname in groups[mariadb_shard_group + '_port_alive_True']
-    - groups[mariadb_shard_group + '_port_alive_True'].index(inventory_hostname) % 4 == item
     - kolla_action != "config"
   listen: restart mariadb
-  loop:
-    - 0
-    - 1
-    - 2
-    - 3
 
 - name: Start MariaDB on new nodes
-  include_tasks: 'restart_services.yml'
+  group_by:
+    key: mariadb_start
   when:
     - bootstrap_host is not defined or bootstrap_host != inventory_hostname
     - groups[mariadb_shard_group + '_port_alive_False'] is defined
     - inventory_hostname in groups[mariadb_shard_group + '_port_alive_False']
-    - groups[mariadb_shard_group + '_port_alive_False'].index(inventory_hostname) % 4 == item
     - kolla_action != "config"
   listen: restart mariadb
-  loop:
-    - 0
-    - 1
-    - 2
-    - 3
-
-- name: Ensure MariaDB is running normally on bootstrap host
-  include_tasks: 'restart_services.yml'
-  listen: Bootstrap MariaDB cluster
 
 - name: Restart mariadb-clustercheck container
   vars:
diff --git a/ansible/roles/mariadb/tasks/deploy.yml b/ansible/roles/mariadb/tasks/deploy.yml
index 348f61b84c..e1f3ad8557 100644
--- a/ansible/roles/mariadb/tasks/deploy.yml
+++ b/ansible/roles/mariadb/tasks/deploy.yml
@@ -4,10 +4,3 @@
 - import_tasks: check-containers.yml
 
 - import_tasks: bootstrap.yml
-
-- name: Flush handlers
-  meta: flush_handlers
-
-- import_tasks: register.yml
-
-- import_tasks: check.yml
diff --git a/ansible/roles/mariadb/tasks/post-deploy.yml b/ansible/roles/mariadb/tasks/post-deploy.yml
new file mode 100644
index 0000000000..c487e00be4
--- /dev/null
+++ b/ansible/roles/mariadb/tasks/post-deploy.yml
@@ -0,0 +1,4 @@
+---
+- import_tasks: register.yml
+
+- import_tasks: check.yml
diff --git a/ansible/roles/mariadb/tasks/post-upgrade.yml b/ansible/roles/mariadb/tasks/post-upgrade.yml
new file mode 100644
index 0000000000..cb07e7d569
--- /dev/null
+++ b/ansible/roles/mariadb/tasks/post-upgrade.yml
@@ -0,0 +1,24 @@
+---
+- name: Run upgrade in MariaDB container
+  vars:
+    service_name: "mariadb"
+    service: "{{ mariadb_services[service_name] }}"
+  become: true
+  kolla_docker:
+    action: "start_container"
+    common_options: "{{ docker_common_options }}"
+    detach: False
+    dimensions: "{{ service.dimensions }}"
+    environment:
+      KOLLA_UPGRADE:
+      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
+      DB_HOST: "{{ api_interface_address }}"
+      DB_PORT: "{{ mariadb_port }}"
+      DB_ROOT_PASSWORD: "{{ database_password }}"
+    image: "{{ service.image }}"
+    labels:
+      UPGRADE:
+    name: "upgrade_mariadb"
+    restart_policy: no
+    volumes: "{{ service.volumes }}"
+  no_log: true
diff --git a/ansible/roles/mariadb/tasks/upgrade.yml b/ansible/roles/mariadb/tasks/upgrade.yml
index 967d57bdac..5b10a7e111 100644
--- a/ansible/roles/mariadb/tasks/upgrade.yml
+++ b/ansible/roles/mariadb/tasks/upgrade.yml
@@ -1,26 +1,2 @@
 ---
 - import_tasks: deploy.yml
-
-- name: Run upgrade in MariaDB container
-  vars:
-    service_name: "mariadb"
-    service: "{{ mariadb_services[service_name] }}"
-  become: true
-  kolla_docker:
-    action: "start_container"
-    common_options: "{{ docker_common_options }}"
-    detach: False
-    dimensions: "{{ service.dimensions }}"
-    environment:
-      KOLLA_UPGRADE:
-      KOLLA_CONFIG_STRATEGY: "{{ config_strategy }}"
-      DB_HOST: "{{ api_interface_address }}"
-      DB_PORT: "{{ mariadb_port }}"
-      DB_ROOT_PASSWORD: "{{ database_password }}"
-    image: "{{ service.image }}"
-    labels:
-      UPGRADE:
-    name: "upgrade_mariadb"
-    restart_policy: no
-    volumes: "{{ service.volumes }}"
-  no_log: true
diff --git a/ansible/roles/prechecks/vars/main.yml b/ansible/roles/prechecks/vars/main.yml
index 6e267f3e70..580bb126e1 100644
--- a/ansible/roles/prechecks/vars/main.yml
+++ b/ansible/roles/prechecks/vars/main.yml
@@ -2,7 +2,7 @@
 docker_version_min: '18.09'
 docker_py_version_min: '3.4.1'
 ansible_version_min: '2.13'
-ansible_version_max: '2.14.2'
+ansible_version_max: '2.14'
 
 # Top level keys should match ansible_facts.distribution.
 # These map to lists of supported releases (ansible_facts.distribution_release) or
diff --git a/ansible/roles/rabbitmq/handlers/main.yml b/ansible/roles/rabbitmq/handlers/main.yml
index cd5e39eb57..4f1b600be9 100644
--- a/ansible/roles/rabbitmq/handlers/main.yml
+++ b/ansible/roles/rabbitmq/handlers/main.yml
@@ -1,26 +1,6 @@
 ---
-# NOTE(mgoddard): These tasks perform a 'full stop upgrade', which is necessary when moving between
-# major releases. In future kolla-ansible releases we may be able to change this to a rolling
-# restart. For info on this process see https://www.rabbitmq.com/upgrade.html
-
-- name: Restart first rabbitmq container
-  vars:
-    service_name: "rabbitmq"
-    service: "{{ rabbitmq_services[service_name] }}"
-  include_tasks: 'restart_services.yml'
+- name: Restart rabbitmq container
+  group_by:
+    key: "{{ project_name }}_restart"
   when:
     - kolla_action != "config"
-    - inventory_hostname == groups[service.group] | first
-  listen: Restart rabbitmq container
-
-- name: Restart remaining rabbitmq containers
-  vars:
-    service_name: "rabbitmq"
-    service: "{{ rabbitmq_services[service_name] }}"
-  include_tasks: 'restart_services.yml'
-  when:
-    - kolla_action != "config"
-    - inventory_hostname == item
-    - inventory_hostname != groups[service.group] | first
-  loop: "{{ groups[service.group] }}"
-  listen: Restart rabbitmq container
diff --git a/ansible/roles/rabbitmq/tasks/deploy.yml b/ansible/roles/rabbitmq/tasks/deploy.yml
index e790621ad7..7be978c440 100644
--- a/ansible/roles/rabbitmq/tasks/deploy.yml
+++ b/ansible/roles/rabbitmq/tasks/deploy.yml
@@ -8,8 +8,3 @@
 - import_tasks: check-containers.yml
 
 - import_tasks: bootstrap.yml
-
-- name: Flush handlers
-  meta: flush_handlers
-
-- import_tasks: feature-flags.yml
diff --git a/ansible/roles/rabbitmq/tasks/post-deploy.yml b/ansible/roles/rabbitmq/tasks/post-deploy.yml
new file mode 100644
index 0000000000..9c04d85c26
--- /dev/null
+++ b/ansible/roles/rabbitmq/tasks/post-deploy.yml
@@ -0,0 +1,2 @@
+---
+- import_tasks: feature-flags.yml
diff --git a/ansible/roles/rabbitmq/tasks/upgrade.yml b/ansible/roles/rabbitmq/tasks/upgrade.yml
index ed672c6aa2..58e5bde46a 100644
--- a/ansible/roles/rabbitmq/tasks/upgrade.yml
+++ b/ansible/roles/rabbitmq/tasks/upgrade.yml
@@ -8,6 +8,3 @@
 - import_tasks: feature-flags.yml
 
 - import_tasks: check-containers.yml
-
-- name: Flush handlers
-  meta: flush_handlers
diff --git a/ansible/site.yml b/ansible/site.yml
index 37fee5926b..4ba77cdc71 100644
--- a/ansible/site.yml
+++ b/ansible/site.yml
@@ -380,14 +380,9 @@
     - { role: redis,
         tags: redis }
 
-- name: Apply role mariadb
-  gather_facts: false
-  hosts:
-    - mariadb
-    - '&enable_mariadb_True'
-  roles:
-    - { role: mariadb,
-        tags: mariadb }
+# MariaDB deployment is more complicated than other services, so is covered in
+# its own playbook.
+- import_playbook: mariadb.yml
 
 - name: Apply role memcached
   gather_facts: false
@@ -440,44 +435,7 @@
     - { role: multipathd,
         tags: multipathd }
 
-- name: Apply role rabbitmq
-  gather_facts: false
-  hosts:
-    - rabbitmq
-    - '&enable_rabbitmq_True'
-  roles:
-    - { role: rabbitmq,
-        tags: rabbitmq,
-        role_rabbitmq_cluster_cookie: '{{ rabbitmq_cluster_cookie }}',
-        role_rabbitmq_cluster_port: '{{ rabbitmq_cluster_port }}',
-        role_rabbitmq_epmd_port: '{{ rabbitmq_epmd_port }}',
-        role_rabbitmq_groups: rabbitmq,
-        role_rabbitmq_management_port: '{{ rabbitmq_management_port }}',
-        role_rabbitmq_monitoring_password: '{{ rabbitmq_monitoring_password }}',
-        role_rabbitmq_monitoring_user: '{{ rabbitmq_monitoring_user }}',
-        role_rabbitmq_password: '{{ rabbitmq_password }}',
-        role_rabbitmq_port: '{{ rabbitmq_port }}',
-        role_rabbitmq_prometheus_port: '{{ rabbitmq_prometheus_port }}',
-        role_rabbitmq_user: '{{ rabbitmq_user }}' }
-
-- name: Apply role rabbitmq (outward)
-  gather_facts: false
-  hosts:
-    - outward-rabbitmq
-    - '&enable_outward_rabbitmq_True'
-  roles:
-    - { role: rabbitmq,
-        tags: rabbitmq,
-        project_name: outward_rabbitmq,
-        role_rabbitmq_cluster_cookie: '{{ outward_rabbitmq_cluster_cookie }}',
-        role_rabbitmq_cluster_port: '{{ outward_rabbitmq_cluster_port }}',
-        role_rabbitmq_epmd_port: '{{ outward_rabbitmq_epmd_port }}',
-        role_rabbitmq_groups: outward-rabbitmq,
-        role_rabbitmq_management_port: '{{ outward_rabbitmq_management_port }}',
-        role_rabbitmq_password: '{{ outward_rabbitmq_password }}',
-        role_rabbitmq_port: '{{ outward_rabbitmq_port }}',
-        role_rabbitmq_prometheus_port: '{{ outward_rabbitmq_prometheus_port }}',
-        role_rabbitmq_user: '{{ outward_rabbitmq_user }}' }
+- import_playbook: rabbitmq.yml
 
 - name: Apply role etcd
   gather_facts: false
diff --git a/doc/source/user/quickstart.rst b/doc/source/user/quickstart.rst
index d71d559c6e..15dfd261ea 100644
--- a/doc/source/user/quickstart.rst
+++ b/doc/source/user/quickstart.rst
@@ -90,12 +90,10 @@ Install dependencies for the virtual environment
       pip install -U pip
 
 #. Install `Ansible <http://www.ansible.com>`__. Kolla Ansible requires at least
-   Ansible ``6`` and supports up to ``7``. Ansible-core must not be greater
-   than 2.14.2 due to a regression.
+   Ansible ``6`` and supports up to ``7``.
 
    .. code-block:: console
 
-      pip install 'ansible-core>=2.13,<=2.14.2'
       pip install 'ansible>=6,<8'
 
 
diff --git a/doc/source/user/virtual-environments.rst b/doc/source/user/virtual-environments.rst
index 9324429a67..5d52a7e61f 100644
--- a/doc/source/user/virtual-environments.rst
+++ b/doc/source/user/virtual-environments.rst
@@ -26,7 +26,6 @@ python virtual environment on the Ansible control host. For example:
    source /path/to/venv/bin/activate
    pip install -U pip
    pip install kolla-ansible
-   pip install 'ansible-core>=2.13,<=2.14.2'
    pip install 'ansible>=6,<8'
    deactivate
 
diff --git a/releasenotes/notes/mariadb-restart-refactor-529502bf5d12b0f4.yaml b/releasenotes/notes/mariadb-restart-refactor-529502bf5d12b0f4.yaml
new file mode 100644
index 0000000000..28b1faacfc
--- /dev/null
+++ b/releasenotes/notes/mariadb-restart-refactor-529502bf5d12b0f4.yaml
@@ -0,0 +1,10 @@
+---
+upgrade:
+  - |
+    Removes the restriction on the maximum supported version of 2.14.2 for
+    ``ansible-core``. Any 2.14 series release is now supported.
+other:
+  - |
+    Refactors the MariaDB and RabbitMQ restart procedures to be compatible with
+    Ansible 2.14.3+.  See `Ansible issue 80848
+    <https://github.com/ansible/ansible/issues/80848>`__ for details.
diff --git a/tests/run.yml b/tests/run.yml
index e693db446a..9ab3dbc6ef 100644
--- a/tests/run.yml
+++ b/tests/run.yml
@@ -245,7 +245,7 @@
     - name: install kolla-ansible and dependencies
       vars:
         ansible_core_version_min: "==2.13.*"
-        ansible_core_version_max: "==2.14.2"
+        ansible_core_version_max: "==2.14.*"
         # Test latest ansible version on Ubuntu, minimum supported on others.
         ansible_core_version_constraint: >-
           {{ ansible_core_version_min if is_upgrade or base_distro != 'ubuntu' else ansible_core_version_max }}