diff --git a/ansible/library/kolla_docker.py b/ansible/library/kolla_docker.py index edead35a52..224c424587 100644 --- a/ansible/library/kolla_docker.py +++ b/ansible/library/kolla_docker.py @@ -936,8 +936,10 @@ class DockerWorker(object): graceful_timeout = 10 container = self.check_container() if not container: - self.module.fail_json( - msg="No such container: {} to stop".format(name)) + ignore_missing = self.params.get('ignore_missing') + if not ignore_missing: + self.module.fail_json( + msg="No such container: {} to stop".format(name)) elif not container['Status'].startswith('Exited '): self.changed = True self.dc.stop(name, timeout=graceful_timeout) @@ -1069,6 +1071,7 @@ def generate_module(): dimensions=dict(required=False, type='dict', default=dict()), tty=dict(required=False, type='bool', default=False), client_timeout=dict(required=False, type='int', default=120), + ignore_missing=dict(required=False, type='bool', default=False), ) required_if = [ ['action', 'pull_image', ['image']], diff --git a/ansible/roles/haproxy/handlers/main.yml b/ansible/roles/haproxy/handlers/main.yml index 60fa5a03e3..ca94f3480f 100644 --- a/ansible/roles/haproxy/handlers/main.yml +++ b/ansible/roles/haproxy/handlers/main.yml @@ -1,5 +1,51 @@ --- -- name: Restart haproxy container +# NOTE(yoctozepto): this handler dance is to ensure we delay restarting master +# keepalived and haproxy which control VIP address until we have working backups. +# This could be improved by checking if backup keepalived do not report FAULT state. +# Master node is handled specially to let it close down connections and only then +# drop the VIP address by stopping keepalived service. + +# NOTE(yoctozepto): we need fresh VIP address placement info (facts may be old) +- name: Check IP addresses on the API interface + vars: + version: "{{ '6' if api_address_family == 'ipv6' else '4' }}" + become: true + command: ip -{{ version }} -o addr show dev {{ api_interface }} + register: ip_addr_output + changed_when: false + when: + - kolla_action != "config" + listen: + - Restart haproxy container + - Restart keepalived container + +- name: Group HA nodes by status + vars: + re_safe_address: "{{ kolla_internal_vip_address | regex_escape }}" + group_by: + key: kolla_ha_is_master_{{ ip_addr_output.stdout is regex('\b' + re_safe_address + '\b') }} + when: + - kolla_action != "config" + listen: + - Restart haproxy container + - Restart keepalived container + +- name: Stop backup keepalived container + become: true + kolla_docker: + action: "stop_container" + # NOTE(yoctozepto): backup node might not have keepalived yet - ignore + ignore_missing: true + common_options: "{{ docker_common_options }}" + name: "keepalived" + when: + - kolla_action != "config" + - groups.kolla_ha_is_master_False is defined + - inventory_hostname in groups.kolla_ha_is_master_False + listen: + - Restart keepalived container + +- name: Restart backup haproxy container vars: service_name: "haproxy" service: "{{ haproxy_services[service_name] }}" @@ -14,12 +60,20 @@ dimensions: "{{ service.dimensions }}" when: - kolla_action != "config" - - inventory_hostname in groups[service.group] - - service.enabled | bool + - groups.kolla_ha_is_master_False is defined + - inventory_hostname in groups.kolla_ha_is_master_False + listen: + - Restart haproxy container + - Restart keepalived container notify: - - Waiting for haproxy to start + - Wait for backup haproxy to start -- name: Restart keepalived container +- name: Wait for backup haproxy to start + wait_for: + host: "{{ api_interface_address }}" + port: "{{ haproxy_monitor_port }}" + +- name: Start backup keepalived container vars: service_name: "keepalived" service: "{{ haproxy_services[service_name] }}" @@ -34,17 +88,92 @@ dimensions: "{{ service.dimensions }}" when: - kolla_action != "config" - - inventory_hostname in groups[service.group] - - service.enabled | bool + - groups.kolla_ha_is_master_False is defined + - inventory_hostname in groups.kolla_ha_is_master_False + listen: + - Restart keepalived container notify: - - Waiting for virtual IP to appear + - Wait for virtual IP to appear -- name: Waiting for haproxy to start +# NOTE(yoctozepto): This is to ensure haproxy can close any open connections +# to the VIP address. +- name: Stop master haproxy container + become: true + kolla_docker: + action: "stop_container" + common_options: "{{ docker_common_options }}" + name: "haproxy" + when: + - kolla_action != "config" + - groups.kolla_ha_is_master_True is defined + - inventory_hostname in groups.kolla_ha_is_master_True + listen: + - Restart keepalived container + +- name: Stop master keepalived container + become: true + kolla_docker: + action: "stop_container" + common_options: "{{ docker_common_options }}" + name: "keepalived" + when: + - kolla_action != "config" + - groups.kolla_ha_is_master_True is defined + - inventory_hostname in groups.kolla_ha_is_master_True + listen: + - Restart keepalived container + +- name: Start master haproxy container + vars: + service_name: "haproxy" + service: "{{ haproxy_services[service_name] }}" + become: true + kolla_docker: + action: "recreate_or_restart_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + image: "{{ service.image }}" + privileged: "{{ service.privileged | default(False) }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + when: + - kolla_action != "config" + - groups.kolla_ha_is_master_True is defined + - inventory_hostname in groups.kolla_ha_is_master_True + listen: + - Restart haproxy container + - Restart keepalived container + notify: + - Wait for master haproxy to start + +- name: Wait for master haproxy to start wait_for: host: "{{ api_interface_address }}" port: "{{ haproxy_monitor_port }}" -- name: Waiting for virtual IP to appear +- name: Start master keepalived container + vars: + service_name: "keepalived" + service: "{{ haproxy_services[service_name] }}" + become: true + kolla_docker: + action: "recreate_or_restart_container" + common_options: "{{ docker_common_options }}" + name: "{{ service.container_name }}" + image: "{{ service.image }}" + privileged: "{{ service.privileged | default(False) }}" + volumes: "{{ service.volumes }}" + dimensions: "{{ service.dimensions }}" + when: + - kolla_action != "config" + - groups.kolla_ha_is_master_True is defined + - inventory_hostname in groups.kolla_ha_is_master_True + listen: + - Restart keepalived container + notify: + - Wait for virtual IP to appear + +- name: Wait for virtual IP to appear wait_for: host: "{{ kolla_internal_vip_address }}" port: "{{ haproxy_monitor_port }}" diff --git a/ansible/roles/haproxy/tasks/check-containers.yml b/ansible/roles/haproxy/tasks/check-containers.yml index a7ce8b4d81..f572177e91 100644 --- a/ansible/roles/haproxy/tasks/check-containers.yml +++ b/ansible/roles/haproxy/tasks/check-containers.yml @@ -1,5 +1,5 @@ --- -- name: Deploy haproxy containers +- name: Check haproxy containers become: true kolla_docker: action: "compare_container" diff --git a/ansible/roles/haproxy/tasks/upgrade.yml b/ansible/roles/haproxy/tasks/upgrade.yml index 17fcbe07c0..5b10a7e111 100644 --- a/ansible/roles/haproxy/tasks/upgrade.yml +++ b/ansible/roles/haproxy/tasks/upgrade.yml @@ -1,22 +1,2 @@ --- -- import_tasks: config-host.yml - -- import_tasks: config.yml - -- name: Stopping all slave keepalived containers - vars: - key: "{{ 'ipv6' if api_address_family == 'ipv6' else 'ipv4_secondaries' }}" - addresses: "{{ hostvars[inventory_hostname]['ansible_' + api_interface].get(key, []) | map(attribute='address') | list }}" - become: true - kolla_docker: - action: "stop_container" - common_options: "{{ docker_common_options }}" - name: "keepalived" - when: kolla_internal_vip_address not in addresses - notify: - - Restart keepalived container - -# NOTE(yoctozepto): haproxy role handlers should not be flushed early. -# site.yml handles all haproxy things in a dedicated play. -# This is to avoid extra haproxy service restart. -# See: https://bugs.launchpad.net/kolla-ansible/+bug/1875228 +- import_tasks: deploy.yml diff --git a/releasenotes/notes/bug-1863510-part-haproxy-0a76829d48818f92.yaml b/releasenotes/notes/bug-1863510-part-haproxy-0a76829d48818f92.yaml new file mode 100644 index 0000000000..f452e7fe61 --- /dev/null +++ b/releasenotes/notes/bug-1863510-part-haproxy-0a76829d48818f92.yaml @@ -0,0 +1,5 @@ +--- +fixes: + - | + Makes haproxy and keepalived restarts during Kolla-Ansible actions more + robust, especially in multinode scenarios (HA). diff --git a/tests/test_kolla_docker.py b/tests/test_kolla_docker.py index fae1898be3..c8dca3572e 100644 --- a/tests/test_kolla_docker.py +++ b/tests/test_kolla_docker.py @@ -94,6 +94,7 @@ class ModuleArgsTest(base.BaseTestCase): tty=dict(required=False, type='bool', default=False), client_timeout=dict(required=False, type='int', default=120), healthcheck=dict(required=False, type='dict'), + ignore_missing=dict(required=False, type='bool', default=False), ) required_if = [ ['action', 'pull_image', ['image']], @@ -175,7 +176,15 @@ FAKE_DATA = { 'Image': 'myregistrydomain.com:5000/ubuntu:16.04', 'ImageID': 'sha256:c5f1cf30', 'Labels': {}, - 'Names': '/my_container'} + 'Names': '/my_container'}, + {'Created': 1463578195, + 'Status': 'Exited (0) 2 hours ago', + 'HostConfig': {'NetworkMode': 'default'}, + 'Id': 'e40d8e7188', + 'Image': 'myregistrydomain.com:5000/ubuntu:16.04', + 'ImageID': 'sha256:c5f1cf30', + 'Labels': {}, + 'Names': '/exited_container'}, ], 'container_inspect': { @@ -396,6 +405,18 @@ class TestContainer(base.BaseTestCase): self.assertTrue(self.dw.changed) self.dw.dc.containers.assert_called_once_with(all=True) self.dw.dc.stop.assert_called_once_with('my_container', timeout=10) + self.dw.module.fail_json.assert_not_called() + + def test_stop_container_already_stopped(self): + self.dw = get_DockerWorker({'name': 'exited_container', + 'action': 'stop_container'}) + self.dw.dc.containers.return_value = self.fake_data['containers'] + self.dw.stop_container() + + self.assertFalse(self.dw.changed) + self.dw.dc.containers.assert_called_once_with(all=True) + self.dw.module.fail_json.assert_not_called() + self.dw.dc.stop.assert_not_called() def test_stop_container_not_exists(self): self.dw = get_DockerWorker({'name': 'fake_container', @@ -405,9 +426,22 @@ class TestContainer(base.BaseTestCase): self.assertFalse(self.dw.changed) self.dw.dc.containers.assert_called_once_with(all=True) + self.dw.dc.stop.assert_not_called() self.dw.module.fail_json.assert_called_once_with( msg="No such container: fake_container to stop") + def test_stop_container_not_exists_ignore_missing(self): + self.dw = get_DockerWorker({'name': 'fake_container', + 'action': 'stop_container', + 'ignore_missing': True}) + self.dw.dc.containers.return_value = self.fake_data['containers'] + self.dw.stop_container() + + self.assertFalse(self.dw.changed) + self.dw.dc.containers.assert_called_once_with(all=True) + self.dw.dc.stop.assert_not_called() + self.dw.module.fail_json.assert_not_called() + def test_stop_and_remove_container(self): self.dw = get_DockerWorker({'name': 'my_container', 'action': 'stop_and_remove_container'})