Merge "Coordinate haproxy and keepalived restarts"

This commit is contained in:
Zuul 2020-10-07 19:26:31 +00:00 committed by Gerrit Code Review
commit dd0d903d88
6 changed files with 186 additions and 35 deletions

@ -936,8 +936,10 @@ class DockerWorker(object):
graceful_timeout = 10
container = self.check_container()
if not container:
self.module.fail_json(
msg="No such container: {} to stop".format(name))
ignore_missing = self.params.get('ignore_missing')
if not ignore_missing:
self.module.fail_json(
msg="No such container: {} to stop".format(name))
elif not container['Status'].startswith('Exited '):
self.changed = True
self.dc.stop(name, timeout=graceful_timeout)
@ -1069,6 +1071,7 @@ def generate_module():
dimensions=dict(required=False, type='dict', default=dict()),
tty=dict(required=False, type='bool', default=False),
client_timeout=dict(required=False, type='int', default=120),
ignore_missing=dict(required=False, type='bool', default=False),
)
required_if = [
['action', 'pull_image', ['image']],

@ -1,5 +1,51 @@
---
- name: Restart haproxy container
# NOTE(yoctozepto): this handler dance is to ensure we delay restarting master
# keepalived and haproxy which control VIP address until we have working backups.
# This could be improved by checking if backup keepalived do not report FAULT state.
# Master node is handled specially to let it close down connections and only then
# drop the VIP address by stopping keepalived service.
# NOTE(yoctozepto): we need fresh VIP address placement info (facts may be old)
- name: Check IP addresses on the API interface
vars:
version: "{{ '6' if api_address_family == 'ipv6' else '4' }}"
become: true
command: ip -{{ version }} -o addr show dev {{ api_interface }}
register: ip_addr_output
changed_when: false
when:
- kolla_action != "config"
listen:
- Restart haproxy container
- Restart keepalived container
- name: Group HA nodes by status
vars:
re_safe_address: "{{ kolla_internal_vip_address | regex_escape }}"
group_by:
key: kolla_ha_is_master_{{ ip_addr_output.stdout is regex('\b' + re_safe_address + '\b') }}
when:
- kolla_action != "config"
listen:
- Restart haproxy container
- Restart keepalived container
- name: Stop backup keepalived container
become: true
kolla_docker:
action: "stop_container"
# NOTE(yoctozepto): backup node might not have keepalived yet - ignore
ignore_missing: true
common_options: "{{ docker_common_options }}"
name: "keepalived"
when:
- kolla_action != "config"
- groups.kolla_ha_is_master_False is defined
- inventory_hostname in groups.kolla_ha_is_master_False
listen:
- Restart keepalived container
- name: Restart backup haproxy container
vars:
service_name: "haproxy"
service: "{{ haproxy_services[service_name] }}"
@ -14,12 +60,20 @@
dimensions: "{{ service.dimensions }}"
when:
- kolla_action != "config"
- inventory_hostname in groups[service.group]
- service.enabled | bool
- groups.kolla_ha_is_master_False is defined
- inventory_hostname in groups.kolla_ha_is_master_False
listen:
- Restart haproxy container
- Restart keepalived container
notify:
- Waiting for haproxy to start
- Wait for backup haproxy to start
- name: Restart keepalived container
- name: Wait for backup haproxy to start
wait_for:
host: "{{ api_interface_address }}"
port: "{{ haproxy_monitor_port }}"
- name: Start backup keepalived container
vars:
service_name: "keepalived"
service: "{{ haproxy_services[service_name] }}"
@ -34,17 +88,92 @@
dimensions: "{{ service.dimensions }}"
when:
- kolla_action != "config"
- inventory_hostname in groups[service.group]
- service.enabled | bool
- groups.kolla_ha_is_master_False is defined
- inventory_hostname in groups.kolla_ha_is_master_False
listen:
- Restart keepalived container
notify:
- Waiting for virtual IP to appear
- Wait for virtual IP to appear
- name: Waiting for haproxy to start
# NOTE(yoctozepto): This is to ensure haproxy can close any open connections
# to the VIP address.
- name: Stop master haproxy container
become: true
kolla_docker:
action: "stop_container"
common_options: "{{ docker_common_options }}"
name: "haproxy"
when:
- kolla_action != "config"
- groups.kolla_ha_is_master_True is defined
- inventory_hostname in groups.kolla_ha_is_master_True
listen:
- Restart keepalived container
- name: Stop master keepalived container
become: true
kolla_docker:
action: "stop_container"
common_options: "{{ docker_common_options }}"
name: "keepalived"
when:
- kolla_action != "config"
- groups.kolla_ha_is_master_True is defined
- inventory_hostname in groups.kolla_ha_is_master_True
listen:
- Restart keepalived container
- name: Start master haproxy container
vars:
service_name: "haproxy"
service: "{{ haproxy_services[service_name] }}"
become: true
kolla_docker:
action: "recreate_or_restart_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}"
image: "{{ service.image }}"
privileged: "{{ service.privileged | default(False) }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
when:
- kolla_action != "config"
- groups.kolla_ha_is_master_True is defined
- inventory_hostname in groups.kolla_ha_is_master_True
listen:
- Restart haproxy container
- Restart keepalived container
notify:
- Wait for master haproxy to start
- name: Wait for master haproxy to start
wait_for:
host: "{{ api_interface_address }}"
port: "{{ haproxy_monitor_port }}"
- name: Waiting for virtual IP to appear
- name: Start master keepalived container
vars:
service_name: "keepalived"
service: "{{ haproxy_services[service_name] }}"
become: true
kolla_docker:
action: "recreate_or_restart_container"
common_options: "{{ docker_common_options }}"
name: "{{ service.container_name }}"
image: "{{ service.image }}"
privileged: "{{ service.privileged | default(False) }}"
volumes: "{{ service.volumes }}"
dimensions: "{{ service.dimensions }}"
when:
- kolla_action != "config"
- groups.kolla_ha_is_master_True is defined
- inventory_hostname in groups.kolla_ha_is_master_True
listen:
- Restart keepalived container
notify:
- Wait for virtual IP to appear
- name: Wait for virtual IP to appear
wait_for:
host: "{{ kolla_internal_vip_address }}"
port: "{{ haproxy_monitor_port }}"

@ -1,5 +1,5 @@
---
- name: Deploy haproxy containers
- name: Check haproxy containers
become: true
kolla_docker:
action: "compare_container"

@ -1,22 +1,2 @@
---
- import_tasks: config-host.yml
- import_tasks: config.yml
- name: Stopping all slave keepalived containers
vars:
key: "{{ 'ipv6' if api_address_family == 'ipv6' else 'ipv4_secondaries' }}"
addresses: "{{ hostvars[inventory_hostname]['ansible_' + api_interface].get(key, []) | map(attribute='address') | list }}"
become: true
kolla_docker:
action: "stop_container"
common_options: "{{ docker_common_options }}"
name: "keepalived"
when: kolla_internal_vip_address not in addresses
notify:
- Restart keepalived container
# NOTE(yoctozepto): haproxy role handlers should not be flushed early.
# site.yml handles all haproxy things in a dedicated play.
# This is to avoid extra haproxy service restart.
# See: https://bugs.launchpad.net/kolla-ansible/+bug/1875228
- import_tasks: deploy.yml

@ -0,0 +1,5 @@
---
fixes:
- |
Makes haproxy and keepalived restarts during Kolla-Ansible actions more
robust, especially in multinode scenarios (HA).

@ -94,6 +94,7 @@ class ModuleArgsTest(base.BaseTestCase):
tty=dict(required=False, type='bool', default=False),
client_timeout=dict(required=False, type='int', default=120),
healthcheck=dict(required=False, type='dict'),
ignore_missing=dict(required=False, type='bool', default=False),
)
required_if = [
['action', 'pull_image', ['image']],
@ -175,7 +176,15 @@ FAKE_DATA = {
'Image': 'myregistrydomain.com:5000/ubuntu:16.04',
'ImageID': 'sha256:c5f1cf30',
'Labels': {},
'Names': '/my_container'}
'Names': '/my_container'},
{'Created': 1463578195,
'Status': 'Exited (0) 2 hours ago',
'HostConfig': {'NetworkMode': 'default'},
'Id': 'e40d8e7188',
'Image': 'myregistrydomain.com:5000/ubuntu:16.04',
'ImageID': 'sha256:c5f1cf30',
'Labels': {},
'Names': '/exited_container'},
],
'container_inspect': {
@ -396,6 +405,18 @@ class TestContainer(base.BaseTestCase):
self.assertTrue(self.dw.changed)
self.dw.dc.containers.assert_called_once_with(all=True)
self.dw.dc.stop.assert_called_once_with('my_container', timeout=10)
self.dw.module.fail_json.assert_not_called()
def test_stop_container_already_stopped(self):
self.dw = get_DockerWorker({'name': 'exited_container',
'action': 'stop_container'})
self.dw.dc.containers.return_value = self.fake_data['containers']
self.dw.stop_container()
self.assertFalse(self.dw.changed)
self.dw.dc.containers.assert_called_once_with(all=True)
self.dw.module.fail_json.assert_not_called()
self.dw.dc.stop.assert_not_called()
def test_stop_container_not_exists(self):
self.dw = get_DockerWorker({'name': 'fake_container',
@ -405,9 +426,22 @@ class TestContainer(base.BaseTestCase):
self.assertFalse(self.dw.changed)
self.dw.dc.containers.assert_called_once_with(all=True)
self.dw.dc.stop.assert_not_called()
self.dw.module.fail_json.assert_called_once_with(
msg="No such container: fake_container to stop")
def test_stop_container_not_exists_ignore_missing(self):
self.dw = get_DockerWorker({'name': 'fake_container',
'action': 'stop_container',
'ignore_missing': True})
self.dw.dc.containers.return_value = self.fake_data['containers']
self.dw.stop_container()
self.assertFalse(self.dw.changed)
self.dw.dc.containers.assert_called_once_with(all=True)
self.dw.dc.stop.assert_not_called()
self.dw.module.fail_json.assert_not_called()
def test_stop_and_remove_container(self):
self.dw = get_DockerWorker({'name': 'my_container',
'action': 'stop_and_remove_container'})