Fix OpenSearch upgrade tasks idempotency

Shard allocation is disabled at the start of the OpenSearch upgrade
task. This is set as a transient setting, meaning it will be removed
once the containers are restarted. However, if there is not change in
the OpenSearch container it will not be restarted so the cluster is left
in a broken state: unable to allocate shards.

This patch moves the pre-upgrade tasks to within the handlers, so shard
allocation and the flush are only performed when the OpenSearch
container is going to be restarted.

Closes-Bug: #2049512
Change-Id: Ia03ba23bfbde7d50a88dc16e4f117dec3c98a448
This commit is contained in:
Matt Crees
2024-01-17 10:54:05 +00:00
parent 77c18fa615
commit e502b65ba1
3 changed files with 47 additions and 35 deletions

View File

@@ -1,4 +1,45 @@
---
- name: Disable shard allocation
become: true
vars:
opensearch_shard_body: {"transient": {"cluster.routing.allocation.enable": "none"}}
kolla_toolbox:
container_engine: "{{ kolla_container_engine }}"
module_name: uri
module_args:
url: "{{ opensearch_internal_endpoint }}/_cluster/settings"
method: PUT
status_code: 200
return_content: yes
body: "{{ opensearch_shard_body | to_json }}" # noqa jinja[invalid]
body_format: json
delegate_to: "{{ groups['opensearch'][0] }}"
run_once: true
listen: "Restart opensearch container"
when:
- kolla_action == "upgrade"
- name: Perform a flush
become: true
kolla_toolbox:
container_engine: "{{ kolla_container_engine }}"
module_name: uri
module_args:
url: "{{ opensearch_internal_endpoint }}/_flush"
method: POST
status_code: 200
return_content: yes
body_format: json
delegate_to: "{{ groups['opensearch'][0] }}"
run_once: true
retries: 10
delay: 5
register: result
until: ('status' in result) and result.status == 200
listen: "Restart opensearch container"
when:
- kolla_action == "upgrade"
- name: Restart opensearch container
vars:
service_name: "opensearch"

View File

@@ -1,39 +1,4 @@
---
- name: Disable shard allocation
become: true
vars:
opensearch_shard_body: {"transient": {"cluster.routing.allocation.enable": "none"}}
kolla_toolbox:
container_engine: "{{ kolla_container_engine }}"
module_name: uri
module_args:
url: "{{ opensearch_internal_endpoint }}/_cluster/settings"
method: PUT
status_code: 200
return_content: yes
body: "{{ opensearch_shard_body | to_json }}" # noqa jinja[invalid]
body_format: json
delegate_to: "{{ groups['opensearch'][0] }}"
run_once: true
- name: Perform a flush
become: true
kolla_toolbox:
container_engine: "{{ kolla_container_engine }}"
module_name: uri
module_args:
url: "{{ opensearch_internal_endpoint }}/_flush"
method: POST
status_code: 200
return_content: yes
body_format: json
delegate_to: "{{ groups['opensearch'][0] }}"
run_once: true
retries: 10
delay: 5
register: result
until: ('status' in result) and result.status == 200
- import_tasks: config-host.yml
- import_tasks: config.yml

View File

@@ -0,0 +1,6 @@
---
fixes:
- |
Fixes an idempotency issue in the OpenSearch upgrade tasks where subsequent
runs of kolla-ansible upgrade would leave shard allocation disabled.
`LP#2049512 <https://launchpad.net/bugs/2049512>`__