Update nodes yaml to use cpu architecture

Migrate cpu relevent configuration parameters from tempest.conf to
nova_yamls file. Before cpu information about the hosts such as the cpu
topology and dedicated/shared set was pulled from tempest.conf. This
change moves the information to the nodes yaml approach [1] for
accessing host specific service information.  The format of the
information can be seen below:

compute-0.redhat.local:
  services:
    libvirt:
      container_name: nova_virtqemud
      start_command: 'systemctl start tripleo_nova_virtqemud'
      stop_command: 'systemctl stop tripleo_nova_virtqemud'
    nova-compute:
      container_name: nova_compute
      config_path: '/var/lib/config-data/puppet-generated/nova_libvirt/etc/nova/nova.conf'
      start_command: 'systemctl start tripleo_nova_compute'
      stop_command: 'systemctl stop tripleo_nova_compute'
  cpu_shared_set: 0,1
  cpu_dedicated_set: 4,5,6,7
  numa:
    node-0:
      cpus: "0-3"
    node-1:
      cpus: "4-7"

[1] 3fe1d72fa6

Change-Id: I1f22131dc04a2d7a5f010da2dfa3f4e9524656a2
This commit is contained in:
James Parker 2023-11-29 11:16:58 -05:00 committed by jamepark4
parent 82c307a034
commit 3ff555d1c8
7 changed files with 187 additions and 71 deletions

View File

@ -54,6 +54,7 @@
# addition to test vTPM hosts need swtpm as well # addition to test vTPM hosts need swtpm as well
extra_packages: ovmf,swtpm-tools extra_packages: ovmf,swtpm-tools
tempest_exclude_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages tempest_exclude_regex: ^whitebox_tempest_plugin\.api\.compute\.test_hugepages
compute_node_template_name: whitebox-devstack-multinode.yaml.j2
devstack_localrc: devstack_localrc:
MAX_COMPUTE_NODES: 2 MAX_COMPUTE_NODES: 2
NOVA_SERVICE_REPORT_INTERVAL: 10 NOVA_SERVICE_REPORT_INTERVAL: 10
@ -150,6 +151,7 @@
vars: vars:
tox_envlist: all tox_envlist: all
tempest_concurrency: 1 tempest_concurrency: 1
compute_node_template_name: whitebox-devstack-ceph-multinode.yaml.j2
devstack_plugins: devstack_plugins:
whitebox-tempest-plugin: https://opendev.org/openstack/whitebox-tempest-plugin.git whitebox-tempest-plugin: https://opendev.org/openstack/whitebox-tempest-plugin.git
tempest_test_regex: '^whitebox_tempest_plugin.api.compute.test_rbd_direct_download' tempest_test_regex: '^whitebox_tempest_plugin.api.compute.test_rbd_direct_download'

View File

@ -1,13 +0,0 @@
{% for compute in computes -%}
{{ compute }}:
services:
libvirt:
start_command: 'systemctl start libvirtd'
stop_command: 'systemctl stop libvirtd'
mask_command: 'systemctl mask libvirtd'
unmask_command: 'systemctl unmask libvirtd'
nova-compute:
config_path: '/etc/nova/nova-cpu.conf'
start_command: 'systemctl start devstack@n-cpu'
stop_command: 'systemctl stop devstack@n-cpu'
{% endfor %}

View File

@ -0,0 +1,22 @@
{{ hostvars['controller']['ansible_fqdn'] }}:
services:
libvirt:
start_command: 'systemctl start libvirtd'
stop_command: 'systemctl stop libvirtd'
mask_command: 'systemctl mask libvirtd'
unmask_command: 'systemctl unmask libvirtd'
nova-compute:
config_path: '/etc/nova/nova-cpu.conf'
start_command: 'systemctl start devstack@n-cpu'
stop_command: 'systemctl stop devstack@n-cpu'
{{ hostvars['compute1']['ansible_fqdn'] }}:
services:
libvirt:
start_command: 'systemctl start libvirtd'
stop_command: 'systemctl stop libvirtd'
mask_command: 'systemctl mask libvirtd'
unmask_command: 'systemctl unmask libvirtd'
nova-compute:
config_path: '/etc/nova/nova-cpu.conf'
start_command: 'systemctl start devstack@n-cpu'
stop_command: 'systemctl stop devstack@n-cpu'

View File

@ -0,0 +1,34 @@
{{ hostvars['controller']['ansible_fqdn'] }}:
services:
libvirt:
start_command: 'systemctl start libvirtd'
stop_command: 'systemctl stop libvirtd'
mask_command: 'systemctl mask libvirtd'
unmask_command: 'systemctl unmask libvirtd'
nova-compute:
config_path: '/etc/nova/nova-cpu.conf'
start_command: 'systemctl start devstack@n-cpu'
stop_command: 'systemctl stop devstack@n-cpu'
cpu_shared_set: {{ hostvars['controller']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_shared_set'] }}
cpu_dedicated_set: {{ hostvars['controller']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_dedicated_set'] }}
topology:
socket-0:
numa-0:
cpus: 0-7
{{ hostvars['compute-host']['ansible_fqdn'] }}:
services:
libvirt:
start_command: 'systemctl start libvirtd'
stop_command: 'systemctl stop libvirtd'
mask_command: 'systemctl mask libvirtd'
unmask_command: 'systemctl unmask libvirtd'
nova-compute:
config_path: '/etc/nova/nova-cpu.conf'
start_command: 'systemctl start devstack@n-cpu'
stop_command: 'systemctl stop devstack@n-cpu'
cpu_shared_set: {{ hostvars['compute-host']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_shared_set'] }}
cpu_dedicated_set: {{ hostvars['compute-host']['devstack_local_conf']['post-config']['$NOVA_CONF']['compute']['cpu_dedicated_set'] }}
topology:
socket-0:
numa-0:
cpus: 0-7

View File

@ -28,23 +28,21 @@
ansible_become: yes ansible_become: yes
copy_sshkey_target_user: 'tempest' copy_sshkey_target_user: 'tempest'
- name: Collect compute hostnames - name: Create compute nodes file
set_fact: block:
computes: "{{ ansible_play_hosts_all|map('extract', hostvars, 'ansible_fqdn')|list }}" - name: Render compute_nodes.yaml template
run_once: true template:
src: "../templates/{{compute_node_template_name}}"
dest: /home/zuul/compute_nodes.yaml
run_once: true
delegate_to: controller
- name: Render compute_nodes.yaml template - name: Output the rendered file at /home/zuul/compute_nodes.yaml
template: shell: |
src: ../templates/compute_nodes.yaml.j2 cat /home/zuul/compute_nodes.yaml
dest: /home/zuul/compute_nodes.yaml run_once: true
run_once: true delegate_to: controller
delegate_to: controller when: compute_node_template_name is defined
- name: Output the rendered file at /home/zuul/compute_nodes.yaml
shell: |
cat /home/zuul/compute_nodes.yaml
run_once: true
delegate_to: controller
- hosts: compute - hosts: compute
tasks: tasks:

View File

@ -121,22 +121,76 @@ class BasePinningTest(base.BaseWhiteboxComputeTest,
numa_topology = whitebox_utils.normalize_json(numa_topology) numa_topology = whitebox_utils.normalize_json(numa_topology)
return numa_topology return numa_topology
def _get_host_cpu_dedicated_set(self, host):
"""Return cpu dedicated or shared set configured for the provided host.
"""
cpu_set = \
whitebox_utils.get_host_details(host).get('cpu_dedicated_set', [])
return hardware.parse_cpu_spec(cpu_set)
def _get_host_cpu_shared_set(self, host):
"""Return cpu dedicated or shared set configured for the provided host.
"""
cpu_set = \
whitebox_utils.get_host_details(host).get('cpu_shared_set', [])
return hardware.parse_cpu_spec(cpu_set)
def _get_shared_set_size(self):
gathered_lists = [self._get_host_cpu_shared_set(host)
for host in self.hosts_details.keys()]
return gathered_lists
def _get_dedicated_set_size(self):
gathered_lists = [self._get_host_cpu_dedicated_set(host)
for host in self.hosts_details.keys()]
return gathered_lists
class CPUPolicyTest(BasePinningTest): class CPUPolicyTest(BasePinningTest):
"""Validate CPU policy support.""" """Validate CPU policy support."""
minimum_shared_cpus = 2
minimum_dedicated_cpus = 2
def setUp(self): def setUp(self):
super().setUp() super().setUp()
self.dedicated_vcpus = ( self.hosts_details = whitebox_utils.get_all_hosts_details()
CONF.whitebox_hardware.dedicated_cpus_per_numa *
len(CONF.whitebox_hardware.cpu_topology)) // 2 # Get the configured shared CPUs of each compute host and confirm
self.shared_vcpus = ( # that every host has the minimum number of shared CPUs necessary
CONF.whitebox_hardware.shared_cpus_per_numa * # to preform test
len(CONF.whitebox_hardware.cpu_topology)) // 2 shared_cpus_per_host = self._get_shared_set_size()
if any(len(cpus) < self.minimum_shared_cpus for cpus in
shared_cpus_per_host):
raise self.skipException(
'A Host in the deployment does not have the minimum required '
'%s shared cpus necessary to execute the tests' %
(self.minimum_shared_cpus))
available_shared_vcpus = \
min(shared_cpus_per_host, key=lambda x: len(x))
# Get the configured dedicated CPUs of each compute host and confirm
# that every host has the minimum number of shared CPUs necessary
# to preform test
dedicated_cpus_per_host = self._get_dedicated_set_size()
if any(len(cpus) < self.minimum_dedicated_cpus for cpus in
dedicated_cpus_per_host):
raise self.skipException(
'A Host in the deployment does not have the minimum required '
'%s dedicated cpus necessary to execute the tests' %
(self.minimum_dedicated_cpus))
available_dedicated_vcpus = \
min(dedicated_cpus_per_host, key=lambda x: len(x))
# Calculate the number of cpus to use in the flavors such the total
# size allows for two guests are capable to be scheduled to the same
# host
self.dedicated_cpus_per_guest = len(available_dedicated_vcpus) // 2
self.shared_vcpus_per_guest = len(available_shared_vcpus) // 2
def test_cpu_shared(self): def test_cpu_shared(self):
"""Ensure an instance with an explicit 'shared' policy work.""" """Ensure an instance with an explicit 'shared' policy work."""
flavor = self.create_flavor(vcpus=self.shared_vcpus, flavor = self.create_flavor(vcpus=self.shared_vcpus_per_guest,
extra_specs=self.shared_cpu_policy) extra_specs=self.shared_cpu_policy)
self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE') self.create_test_server(flavor=flavor['id'], wait_until='ACTIVE')
@ -147,7 +201,7 @@ class CPUPolicyTest(BasePinningTest):
default. However, we check specifics of that later and only assert that default. However, we check specifics of that later and only assert that
things aren't overlapping here. things aren't overlapping here.
""" """
flavor = self.create_flavor(vcpus=self.dedicated_vcpus, flavor = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
extra_specs=self.dedicated_cpu_policy) extra_specs=self.dedicated_cpu_policy)
server_a = self.create_test_server(flavor=flavor['id'], server_a = self.create_test_server(flavor=flavor['id'],
wait_until='ACTIVE') wait_until='ACTIVE')
@ -156,13 +210,16 @@ class CPUPolicyTest(BasePinningTest):
wait_until='ACTIVE') wait_until='ACTIVE')
cpu_pinnings_a = self.get_server_cpu_pinning(server_a['id']) cpu_pinnings_a = self.get_server_cpu_pinning(server_a['id'])
cpu_pinnings_b = self.get_server_cpu_pinning(server_b['id']) cpu_pinnings_b = self.get_server_cpu_pinning(server_b['id'])
host = self.get_host_for_server(server_a['id'])
self.assertEqual( dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
len(cpu_pinnings_a), self.dedicated_vcpus, self.assertTrue(
"Instance should be pinned but it is unpinned") set(cpu_pinnings_a.values()).issubset(dedicated_vcpus),
self.assertEqual( "Instance A's pinning %s should be a subset of pinning range %s"
len(cpu_pinnings_b), self.dedicated_vcpus, % (cpu_pinnings_a, dedicated_vcpus))
"Instance should be pinned but it is unpinned") self.assertTrue(
set(cpu_pinnings_b.values()).issubset(dedicated_vcpus),
"Instance B's pinning %s should be a subset of pinning range %s"
% (cpu_pinnings_b, dedicated_vcpus))
self.assertTrue( self.assertTrue(
set(cpu_pinnings_a.values()).isdisjoint( set(cpu_pinnings_a.values()).isdisjoint(
@ -175,17 +232,20 @@ class CPUPolicyTest(BasePinningTest):
'Resize not available.') 'Resize not available.')
def test_resize_pinned_server_to_unpinned(self): def test_resize_pinned_server_to_unpinned(self):
"""Ensure resizing an instance to unpinned actually drops pinning.""" """Ensure resizing an instance to unpinned actually drops pinning."""
flavor_a = self.create_flavor(vcpus=self.dedicated_vcpus, flavor_a = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
extra_specs=self.dedicated_cpu_policy) extra_specs=self.dedicated_cpu_policy)
server = self.create_test_server(flavor=flavor_a['id'], server = self.create_test_server(flavor=flavor_a['id'],
wait_until='ACTIVE') wait_until='ACTIVE')
cpu_pinnings = self.get_server_cpu_pinning(server['id']) cpu_pinnings = self.get_server_cpu_pinning(server['id'])
host = self.get_host_for_server(server['id'])
dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
self.assertTrue(
set(cpu_pinnings.values()).issubset(dedicated_vcpus),
"Instance pinning %s should be a subset of pinning range %s"
% (cpu_pinnings, dedicated_vcpus))
self.assertEqual( flavor_b = self.create_flavor(vcpus=self.shared_vcpus_per_guest,
len(cpu_pinnings), self.dedicated_vcpus,
"Instance should be pinned but is unpinned")
flavor_b = self.create_flavor(vcpus=self.shared_vcpus,
extra_specs=self.shared_cpu_policy) extra_specs=self.shared_cpu_policy)
self.resize_server(server['id'], flavor_b['id']) self.resize_server(server['id'], flavor_b['id'])
cpu_pinnings = self.get_server_cpu_pinning(server['id']) cpu_pinnings = self.get_server_cpu_pinning(server['id'])
@ -198,7 +258,7 @@ class CPUPolicyTest(BasePinningTest):
'Resize not available.') 'Resize not available.')
def test_resize_unpinned_server_to_pinned(self): def test_resize_unpinned_server_to_pinned(self):
"""Ensure resizing an instance to pinned actually applies pinning.""" """Ensure resizing an instance to pinned actually applies pinning."""
flavor_a = self.create_flavor(vcpus=self.shared_vcpus, flavor_a = self.create_flavor(vcpus=self.shared_vcpus_per_guest,
extra_specs=self.shared_cpu_policy) extra_specs=self.shared_cpu_policy)
server = self.create_test_server(flavor=flavor_a['id'], server = self.create_test_server(flavor=flavor_a['id'],
wait_until='ACTIVE') wait_until='ACTIVE')
@ -208,26 +268,32 @@ class CPUPolicyTest(BasePinningTest):
len(cpu_pinnings), 0, len(cpu_pinnings), 0,
"Instance should be unpinned but is pinned") "Instance should be unpinned but is pinned")
flavor_b = self.create_flavor(vcpus=self.dedicated_vcpus, flavor_b = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
extra_specs=self.dedicated_cpu_policy) extra_specs=self.dedicated_cpu_policy)
self.resize_server(server['id'], flavor_b['id']) self.resize_server(server['id'], flavor_b['id'])
cpu_pinnings = self.get_server_cpu_pinning(server['id'])
self.assertEqual( cpu_pinnings = self.get_server_cpu_pinning(server['id'])
len(cpu_pinnings), self.dedicated_vcpus, host = self.get_host_for_server(server['id'])
"Resized instance should be pinned but is still unpinned") dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
self.assertTrue(
set(cpu_pinnings.values()).issubset(dedicated_vcpus),
"After resize instance %s pinning %s should be a subset of "
"pinning range %s" % (server['id'], cpu_pinnings, dedicated_vcpus))
def test_reboot_pinned_server(self): def test_reboot_pinned_server(self):
"""Ensure pinning information is persisted after a reboot.""" """Ensure pinning information is persisted after a reboot."""
flavor = self.create_flavor(vcpus=self.dedicated_vcpus, flavor = self.create_flavor(vcpus=self.dedicated_cpus_per_guest,
extra_specs=self.dedicated_cpu_policy) extra_specs=self.dedicated_cpu_policy)
server = self.create_test_server(flavor=flavor['id'], server = self.create_test_server(flavor=flavor['id'],
wait_until='ACTIVE') wait_until='ACTIVE')
cpu_pinnings = self.get_server_cpu_pinning(server['id'])
self.assertEqual( cpu_pinnings = self.get_server_cpu_pinning(server['id'])
len(cpu_pinnings), self.dedicated_vcpus, host = self.get_host_for_server(server['id'])
"CPU pinning was not applied to new instance.") dedicated_vcpus = self._get_host_cpu_dedicated_set(host)
self.assertTrue(
set(cpu_pinnings.values()).issubset(dedicated_vcpus),
"After resize instance %s pinning %s should be a subset of "
"pinning range %s" % (server['id'], cpu_pinnings, dedicated_vcpus))
self.reboot_server(server['id'], 'HARD') self.reboot_server(server['id'], 'HARD')
cpu_pinnings = self.get_server_cpu_pinning(server['id']) cpu_pinnings = self.get_server_cpu_pinning(server['id'])
@ -235,8 +301,8 @@ class CPUPolicyTest(BasePinningTest):
# we don't actually assert that the same pinning information is used # we don't actually assert that the same pinning information is used
# because that's not expected. We just care that _some_ pinning is in # because that's not expected. We just care that _some_ pinning is in
# effect # effect
self.assertEqual( self.assertTrue(
len(cpu_pinnings), self.dedicated_vcpus, set(cpu_pinnings.values()).issubset(dedicated_vcpus),
"Rebooted instance has lost its pinning information") "Rebooted instance has lost its pinning information")
@ -428,9 +494,7 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
# Determine the compute host the guest was scheduled to and gather # Determine the compute host the guest was scheduled to and gather
# the cpu shared set from the host # the cpu shared set from the host
host = self.get_host_for_server(server['id']) host = self.get_host_for_server(server['id'])
host_sm = clients.NovaServiceManager(host, 'nova-compute', cpu_shared_set = self._get_host_cpu_shared_set(host)
self.os_admin.services_client)
cpu_shared_set = host_sm.get_cpu_shared_set()
# Gather the emulator threads from the server # Gather the emulator threads from the server
emulator_threads = \ emulator_threads = \
@ -506,8 +570,10 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
# Create a flavor using the isolate threads_policy and then launch # Create a flavor using the isolate threads_policy and then launch
# an instance with the flavor # an instance with the flavor
flavor = self.create_flavor(threads_policy='isolate', flavor = self.create_flavor(
vcpus=(self.dedicated_cpus_per_numa - 1)) threads_policy='isolate',
vcpus=(self.dedicated_cpus_per_numa - 1)
)
server = self.create_test_server(flavor=flavor['id'], server = self.create_test_server(flavor=flavor['id'],
wait_until='ACTIVE') wait_until='ACTIVE')
@ -520,9 +586,7 @@ class EmulatorThreadTest(BasePinningTest, numa_helper.NUMAHelperMixin):
# Determine the compute host the guest was scheduled to and gather # Determine the compute host the guest was scheduled to and gather
# the cpu dedicated set from the host # the cpu dedicated set from the host
host = self.get_host_for_server(server['id']) host = self.get_host_for_server(server['id'])
host_sm = clients.NovaServiceManager(host, 'nova-compute', cpu_dedicated_set = self._get_host_cpu_dedicated_set(host)
self.os_admin.services_client)
cpu_dedicated_set = host_sm.get_cpu_dedicated_set()
# Confirm the pinned cpus from the guest are part of the dedicated # Confirm the pinned cpus from the guest are part of the dedicated
# range of the compute host it is scheduled to # range of the compute host it is scheduled to

View File

@ -81,3 +81,12 @@ def get_host_details(host):
with open(nodes_location, "r") as f: with open(nodes_location, "r") as f:
_nodes = yaml.safe_load(f) _nodes = yaml.safe_load(f)
return _nodes.get(host) return _nodes.get(host)
def get_all_hosts_details():
global _nodes
if _nodes is None:
nodes_location = CONF.whitebox.nodes_yaml
with open(nodes_location, "r") as f:
_nodes = yaml.safe_load(f)
return _nodes