From e502b65ba1ef7ae0b321ed001948d96d29c57f08 Mon Sep 17 00:00:00 2001
From: Matt Crees <mattc@stackhpc.com>
Date: Wed, 17 Jan 2024 10:54:05 +0000
Subject: [PATCH] Fix OpenSearch upgrade tasks idempotency

Shard allocation is disabled at the start of the OpenSearch upgrade
task. This is set as a transient setting, meaning it will be removed
once the containers are restarted. However, if there is not change in
the OpenSearch container it will not be restarted so the cluster is left
in a broken state: unable to allocate shards.

This patch moves the pre-upgrade tasks to within the handlers, so shard
allocation and the flush are only performed when the OpenSearch
container is going to be restarted.

Closes-Bug: #2049512
Change-Id: Ia03ba23bfbde7d50a88dc16e4f117dec3c98a448
---
 ansible/roles/opensearch/handlers/main.yml    | 41 +++++++++++++++++++
 ansible/roles/opensearch/tasks/upgrade.yml    | 35 ----------------
 ...-upgrade-idempotency-4d20a8102717cc56.yaml |  6 +++
 3 files changed, 47 insertions(+), 35 deletions(-)
 create mode 100644 releasenotes/notes/fix-opensearch-upgrade-idempotency-4d20a8102717cc56.yaml

diff --git a/ansible/roles/opensearch/handlers/main.yml b/ansible/roles/opensearch/handlers/main.yml
index a32287297d..e5f07dfca3 100644
--- a/ansible/roles/opensearch/handlers/main.yml
+++ b/ansible/roles/opensearch/handlers/main.yml
@@ -1,4 +1,45 @@
 ---
+- name: Disable shard allocation
+  become: true
+  vars:
+    opensearch_shard_body: {"transient": {"cluster.routing.allocation.enable": "none"}}
+  kolla_toolbox:
+    container_engine: "{{ kolla_container_engine }}"
+    module_name: uri
+    module_args:
+      url: "{{ opensearch_internal_endpoint }}/_cluster/settings"
+      method: PUT
+      status_code: 200
+      return_content: yes
+      body: "{{ opensearch_shard_body | to_json }}"  # noqa jinja[invalid]
+      body_format: json
+  delegate_to: "{{ groups['opensearch'][0] }}"
+  run_once: true
+  listen: "Restart opensearch container"
+  when:
+    - kolla_action == "upgrade"
+
+- name: Perform a flush
+  become: true
+  kolla_toolbox:
+    container_engine: "{{ kolla_container_engine }}"
+    module_name: uri
+    module_args:
+      url: "{{ opensearch_internal_endpoint }}/_flush"
+      method: POST
+      status_code: 200
+      return_content: yes
+      body_format: json
+  delegate_to: "{{ groups['opensearch'][0] }}"
+  run_once: true
+  retries: 10
+  delay: 5
+  register: result
+  until: ('status' in result) and result.status == 200
+  listen: "Restart opensearch container"
+  when:
+    - kolla_action == "upgrade"
+
 - name: Restart opensearch container
   vars:
     service_name: "opensearch"
diff --git a/ansible/roles/opensearch/tasks/upgrade.yml b/ansible/roles/opensearch/tasks/upgrade.yml
index da343e8b75..cb376892a9 100644
--- a/ansible/roles/opensearch/tasks/upgrade.yml
+++ b/ansible/roles/opensearch/tasks/upgrade.yml
@@ -1,39 +1,4 @@
 ---
-- name: Disable shard allocation
-  become: true
-  vars:
-    opensearch_shard_body: {"transient": {"cluster.routing.allocation.enable": "none"}}
-  kolla_toolbox:
-    container_engine: "{{ kolla_container_engine }}"
-    module_name: uri
-    module_args:
-      url: "{{ opensearch_internal_endpoint }}/_cluster/settings"
-      method: PUT
-      status_code: 200
-      return_content: yes
-      body: "{{ opensearch_shard_body | to_json }}"  # noqa jinja[invalid]
-      body_format: json
-  delegate_to: "{{ groups['opensearch'][0] }}"
-  run_once: true
-
-- name: Perform a flush
-  become: true
-  kolla_toolbox:
-    container_engine: "{{ kolla_container_engine }}"
-    module_name: uri
-    module_args:
-      url: "{{ opensearch_internal_endpoint }}/_flush"
-      method: POST
-      status_code: 200
-      return_content: yes
-      body_format: json
-  delegate_to: "{{ groups['opensearch'][0] }}"
-  run_once: true
-  retries: 10
-  delay: 5
-  register: result
-  until: ('status' in result) and result.status == 200
-
 - import_tasks: config-host.yml
 
 - import_tasks: config.yml
diff --git a/releasenotes/notes/fix-opensearch-upgrade-idempotency-4d20a8102717cc56.yaml b/releasenotes/notes/fix-opensearch-upgrade-idempotency-4d20a8102717cc56.yaml
new file mode 100644
index 0000000000..3d63703347
--- /dev/null
+++ b/releasenotes/notes/fix-opensearch-upgrade-idempotency-4d20a8102717cc56.yaml
@@ -0,0 +1,6 @@
+---
+fixes:
+  - |
+    Fixes an idempotency issue in the OpenSearch upgrade tasks where subsequent
+    runs of kolla-ansible upgrade would leave shard allocation disabled.
+    `LP#2049512 <https://launchpad.net/bugs/2049512>`__