Refactor cellv2 host discovery logic to avoid races
The compute service list is polled until all expected hosts are reported or a timeout occurs (600s). Adds a cellv2_discovery flag to puppet services. Used to generate a list of hosts that should have cellv2 host mappings. Adds a canonical fqdn and that should match the fqdn reported by a host. Adds the ability to upload a config script for docker config instead of using complex bash on-liners. Closes-bug: 1720821 Change-Id: I33e2f296526c957cb5f96dff19682a4e60c6a0f0
This commit is contained in:
parent
9ca2c2a46a
commit
61fcfca045
@ -208,6 +208,7 @@ resources:
|
||||
kolla_config: {get_param: [role_data, {{role.name}}, kolla_config]}
|
||||
bootstrap_server_id: {get_param: [servers, {{primary_role_name}}, '0']}
|
||||
puppet_step_config: {get_param: [role_data, {{role.name}}, step_config]}
|
||||
docker_config_scripts: {get_param: [role_data, {{role.name}}, docker_config_scripts]}
|
||||
tasks:
|
||||
# Join host_prep_tasks with the other per-host configuration
|
||||
list_concat:
|
||||
@ -230,6 +231,11 @@ resources:
|
||||
# FIXME: can we move docker-puppet somewhere so it's installed via a package?
|
||||
- name: Write docker-puppet.py
|
||||
copy: content="{{ '{{' }}docker_puppet_script{{ '}}' }}" dest=/var/lib/docker-puppet/docker-puppet.py force=yes mode=0600
|
||||
- name: Create /var/lib/docker-config-scripts
|
||||
file: path=/var/lib/docker-config-scripts state=directory
|
||||
- name: Write docker config scripts
|
||||
copy: content="{{ '{{' }}item.value.content{{ '}}' }}" dest="/var/lib/docker-config-scripts/{{ '{{' }}item.key{{ '}}' }}" force=yes mode="{{ '{{' }}item.value.mode|default('0600', true){{ '}}' }}"
|
||||
with_dict: "{{ '{{' }}docker_config_scripts{{ '}}' }}"
|
||||
# Here we are dumping all the docker container startup configuration data
|
||||
# so that we can have access to how they are started outside of heat
|
||||
# and docker-cmd. This lets us create command line tools to test containers.
|
||||
|
@ -89,6 +89,31 @@ resources:
|
||||
service_names: {get_attr: [ServiceChain, role_data, service_names]}
|
||||
docker_config: {get_attr: [ServiceChain, role_data, docker_config]}
|
||||
|
||||
DockerConfigScripts:
|
||||
type: OS::Heat::Value
|
||||
properties:
|
||||
type: json
|
||||
value:
|
||||
yaql:
|
||||
expression:
|
||||
# select 'docker_config_scripts' only from services that have it
|
||||
coalesce($.data.service_names, []).zip(coalesce($.data.docker_config_scripts, [])).where($[1] != null).select($[1]).reduce($1.mergeWith($2), {})
|
||||
data:
|
||||
service_names: {get_attr: [ServiceChain, role_data, service_names]}
|
||||
docker_config_scripts: {get_attr: [ServiceChain, role_data, docker_config_scripts]}
|
||||
|
||||
CellV2Discovery:
|
||||
type: OS::Heat::Value
|
||||
properties:
|
||||
type: boolean
|
||||
value:
|
||||
yaql:
|
||||
expression:
|
||||
# If any service in this role requires cellv2_discovery then this value is true
|
||||
coalesce($.data.cellv2_discovery, []).contains(true)
|
||||
data:
|
||||
cellv2_discovery: {get_attr: [ServiceChain, role_data, cellv2_discovery]}
|
||||
|
||||
LoggingSourcesConfig:
|
||||
type: OS::Heat::Value
|
||||
properties:
|
||||
@ -293,5 +318,7 @@ outputs:
|
||||
puppet_config: {get_attr: [PuppetConfig, value]}
|
||||
kolla_config: {get_attr: [KollaConfig, value]}
|
||||
docker_config: {get_attr: [DockerConfig, value]}
|
||||
docker_config_scripts: {get_attr: [DockerConfigScripts, value]}
|
||||
docker_puppet_tasks: {get_attr: [DockerPuppetTasks, value]}
|
||||
host_prep_tasks: {get_attr: [HostPrepTasks, value]}
|
||||
cellv2_discovery: {get_attr: [CellV2Discovery, value]}
|
||||
|
@ -117,6 +117,58 @@ outputs:
|
||||
- path: /var/log/nova
|
||||
owner: nova:nova
|
||||
recurse: true
|
||||
docker_config_scripts:
|
||||
nova_api_discover_hosts.sh:
|
||||
mode: "0700"
|
||||
content: |
|
||||
#!/bin/bash
|
||||
export OS_PROJECT_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_domain_name)
|
||||
export OS_USER_DOMAIN_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken user_domain_name)
|
||||
export OS_PROJECT_NAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken project_name)
|
||||
export OS_USERNAME=$(crudini --get /etc/nova/nova.conf keystone_authtoken username)
|
||||
export OS_PASSWORD=$(crudini --get /etc/nova/nova.conf keystone_authtoken password)
|
||||
export OS_AUTH_URL=$(crudini --get /etc/nova/nova.conf keystone_authtoken auth_url)
|
||||
export OS_AUTH_TYPE=password
|
||||
export OS_IDENTITY_API_VERSION=3
|
||||
|
||||
echo "(cellv2) Running cell_v2 host discovery"
|
||||
timeout=600
|
||||
loop_wait=30
|
||||
declare -A discoverable_hosts
|
||||
for host in $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | tr "," " "); do discoverable_hosts[$host]=1; done
|
||||
timeout_at=$(( $(date +"%s") + ${timeout} ))
|
||||
echo "(cellv2) Waiting ${timeout} seconds for hosts to register"
|
||||
finished=0
|
||||
while : ; do
|
||||
for host in $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }'); do
|
||||
if (( discoverable_hosts[$host] == 1 )); then
|
||||
echo "(cellv2) compute node $host has registered"
|
||||
unset discoverable_hosts[$host]
|
||||
fi
|
||||
done
|
||||
finished=1
|
||||
for host in "${!discoverable_hosts[@]}"; do
|
||||
if (( ${discoverable_hosts[$host]} == 1 )); then
|
||||
echo "(cellv2) compute node $host has not registered"
|
||||
finished=0
|
||||
fi
|
||||
done
|
||||
remaining=$(( $timeout_at - $(date +"%s") ))
|
||||
if (( $finished == 1 )); then
|
||||
echo "(cellv2) All nodes registered"
|
||||
break
|
||||
elif (( $remaining <= 0 )); then
|
||||
echo "(cellv2) WARNING: timeout waiting for nodes to register, running host discovery regardless"
|
||||
echo "(cellv2) Expected host list:" $(hiera -c /etc/puppet/hiera.yaml cellv2_discovery_hosts | sed -e '/^nil$/d' | sort -u | tr ',' ' ')
|
||||
echo "(cellv2) Detected host list:" $(openstack -q compute service list -c 'Host' -c 'Zone' -f value | awk '$2 != "internal" { print $1 }' | sort -u | tr '\n', ' ')
|
||||
break
|
||||
else
|
||||
echo "(cellv2) Waiting ${remaining} seconds for hosts to register"
|
||||
sleep $loop_wait
|
||||
fi
|
||||
done
|
||||
echo "(cellv2) Running host discovery..."
|
||||
su nova -s /bin/bash -c "/usr/bin/nova-manage cell_v2 discover_hosts --verbose"
|
||||
docker_config:
|
||||
step_2:
|
||||
get_attr: [NovaApiLogging, docker_config, step_2]
|
||||
@ -216,9 +268,16 @@ outputs:
|
||||
image: *nova_api_image
|
||||
net: host
|
||||
detach: false
|
||||
volumes: *nova_api_bootstrap_volumes
|
||||
volumes:
|
||||
list_concat:
|
||||
- *nova_api_bootstrap_volumes
|
||||
-
|
||||
- /var/lib/config-data/nova/etc/my.cnf.d/tripleo.cnf:/etc/my.cnf.d/tripleo.cnf:ro
|
||||
- /var/lib/config-data/nova/etc/nova/:/etc/nova/:ro
|
||||
- /var/log/containers/nova:/var/log/nova
|
||||
- /var/lib/docker-config-scripts/nova_api_discover_hosts.sh:/nova_api_discover_hosts.sh:ro
|
||||
user: root
|
||||
command: "/usr/bin/bootstrap_host_exec nova_api su nova -s /bin/bash -c '/usr/bin/nova-manage cell_v2 discover_hosts --verbose'"
|
||||
command: "/usr/bin/bootstrap_host_exec nova_api /nova_api_discover_hosts.sh"
|
||||
metadata_settings:
|
||||
get_attr: [NovaApiBase, role_data, metadata_settings]
|
||||
host_prep_tasks: {get_attr: [NovaApiLogging, host_prep_tasks]}
|
||||
|
@ -73,6 +73,7 @@ outputs:
|
||||
description: Role data for the Nova Compute service.
|
||||
value:
|
||||
service_name: {get_attr: [NovaComputeBase, role_data, service_name]}
|
||||
cellv2_discovery: true
|
||||
config_settings:
|
||||
get_attr: [NovaComputeBase, role_data, config_settings]
|
||||
logging_source: {get_attr: [NovaComputeBase, role_data, logging_source]}
|
||||
@ -111,7 +112,6 @@ outputs:
|
||||
owner: nova:nova
|
||||
recurse: true
|
||||
docker_config:
|
||||
# FIXME: run discover hosts here
|
||||
step_4:
|
||||
nova_compute:
|
||||
image: &nova_compute_image {get_param: DockerNovaComputeImage}
|
||||
|
@ -642,6 +642,21 @@ resources:
|
||||
{% for role in roles %}
|
||||
- {get_attr: [{{role.name}}ServiceNames, value]}
|
||||
{% endfor %}
|
||||
cellv2_discovery_hosts:
|
||||
# Collects compute hostnames for all roles with a service that requires cellv2 host discovery
|
||||
list_join:
|
||||
- ','
|
||||
- yaql:
|
||||
expression: coalesce($.data.e.zip($.data.l).where($[0]).select($[1]).flatten(), [])
|
||||
data:
|
||||
e: # list of true/fails for whether cellsv2 host discovery is required for the roles
|
||||
{%- for role in roles %}
|
||||
- {get_attr: [{{role.name}}ServiceChainRoleData, value, cellv2_discovery]}
|
||||
{%- endfor %}
|
||||
l: # list of list of compute hostnames for the roles
|
||||
{%- for role in roles %}
|
||||
- {get_attr: [{{role.name}}, hostname_map, canonical]}
|
||||
{%- endfor %}
|
||||
controller_ips: {get_attr: [{{primary_role_name}}, ip_address]}
|
||||
controller_names: {get_attr: [{{primary_role_name}}, hostname]}
|
||||
service_ips:
|
||||
|
@ -22,6 +22,8 @@ parameters:
|
||||
type: json
|
||||
controller_names:
|
||||
type: comma_delimited_list
|
||||
cellv2_discovery_hosts:
|
||||
type: comma_delimited_list
|
||||
NetVipMap:
|
||||
type: json
|
||||
RedisVirtualIP:
|
||||
@ -141,6 +143,10 @@ resources:
|
||||
list_join:
|
||||
- ','
|
||||
- {get_param: controller_names}
|
||||
- cellv2_discovery_hosts:
|
||||
list_join:
|
||||
- ','
|
||||
- {get_param: cellv2_discovery_hosts}
|
||||
deploy_identifier: {get_param: DeployIdentifier}
|
||||
update_identifier: {get_param: UpdateIdentifier}
|
||||
stack_action: {get_param: StackAction}
|
||||
|
@ -477,6 +477,14 @@ resources:
|
||||
- '.'
|
||||
- - {get_attr: [{{server_resource_name}}, name]}
|
||||
- ctlplane
|
||||
canonical:
|
||||
fqdn:
|
||||
list_join:
|
||||
- '.'
|
||||
- - {get_attr: [{{server_resource_name}}, name]}
|
||||
- {get_param: CloudDomain}
|
||||
short:
|
||||
- {get_attr: [{{server_resource_name}}, name]}
|
||||
|
||||
PreNetworkConfig:
|
||||
type: OS::TripleO::{{role.name}}::PreNetworkConfig
|
||||
@ -602,6 +610,7 @@ resources:
|
||||
fqdn_management: {get_attr: [NetHostMap, value, management, fqdn]}
|
||||
fqdn_ctlplane: {get_attr: [NetHostMap, value, ctlplane, fqdn]}
|
||||
fqdn_external: {get_attr: [NetHostMap, value, external, fqdn]}
|
||||
fqdn_canonical: {get_attr: [NetHostMap, value, canonical, fqdn]}
|
||||
|
||||
# Resource for site-specific injection of root certificate
|
||||
NodeTLSCAData:
|
||||
@ -696,6 +705,7 @@ outputs:
|
||||
{{network.name_lower|default(network.name.lower())}}: {get_attr: [NetHostMap, value, {{network.name_lower|default(network.name.lower()) }}, fqdn]}
|
||||
{%- endfor %}
|
||||
ctlplane: {get_attr: [NetHostMap, value, ctlplane, fqdn]}
|
||||
canonical: {get_attr: [NetHostMap, value, canonical, fqdn]}
|
||||
hosts_entry:
|
||||
value:
|
||||
str_replace:
|
||||
|
@ -40,7 +40,7 @@ envs_containing_endpoint_map = ['tls-endpoints-public-dns.yaml',
|
||||
'tls-endpoints-public-ip.yaml',
|
||||
'tls-everywhere-endpoints-dns.yaml']
|
||||
ENDPOINT_MAP_FILE = 'endpoint_map.yaml'
|
||||
OPTIONAL_SECTIONS = ['workflow_tasks']
|
||||
OPTIONAL_SECTIONS = ['workflow_tasks', 'cellv2_discovery']
|
||||
REQUIRED_DOCKER_SECTIONS = ['service_name', 'docker_config', 'puppet_config',
|
||||
'config_settings', 'step_config']
|
||||
OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks',
|
||||
@ -48,7 +48,8 @@ OPTIONAL_DOCKER_SECTIONS = ['docker_puppet_tasks', 'upgrade_tasks',
|
||||
'service_config_settings', 'host_prep_tasks',
|
||||
'metadata_settings', 'kolla_config',
|
||||
'global_config_settings', 'logging_source',
|
||||
'logging_groups', 'external_deploy_tasks']
|
||||
'logging_groups', 'external_deploy_tasks',
|
||||
'docker_config_scripts']
|
||||
REQUIRED_DOCKER_PUPPET_CONFIG_SECTIONS = ['config_volume', 'step_config',
|
||||
'config_image']
|
||||
OPTIONAL_DOCKER_PUPPET_CONFIG_SECTIONS = [ 'puppet_tags', 'volumes' ]
|
||||
|
Loading…
x
Reference in New Issue
Block a user