From 809afdbc5bada6acbe0e16fcd650b0fed8d4824e Mon Sep 17 00:00:00 2001 From: Daniel Caires Date: Fri, 26 Sep 2025 07:07:05 -0300 Subject: [PATCH] Update libvirt cgroup controllers initialization The libvirt cgroup initialization in the caracal version uses a hard-coded list of controllers, that are set in the libvirt bash file. This patch updates the .sh to it's latest version [1], where it compares a list of controllers set in the values file with the controllers available in the host, and use that list to initialize the controllers in the libvirt process. This patch also removes a hugepage that existed in the bash file, as it was removed from the upstream repo as well [2]. Commit's SHA that added the change in this patch, on the upstream repository: [1] - https://opendev.org/openstack/openstack-helm/commit/3903f54d0c1701f86f92da9023b67b7b453c4760 [2] - https://opendev.org/openstack/openstack-helm/commit/ea3c04a7d9e39d63402751353e00d21762d988e5 Signed-off-by: Daniel Caires --- libvirt/templates/bin/_libvirt.sh.tpl | 76 +++++---------------------- libvirt/values.yaml | 14 +++++ 2 files changed, 26 insertions(+), 64 deletions(-) diff --git a/libvirt/templates/bin/_libvirt.sh.tpl b/libvirt/templates/bin/_libvirt.sh.tpl index d16cdca3..af1b4f5e 100644 --- a/libvirt/templates/bin/_libvirt.sh.tpl +++ b/libvirt/templates/bin/_libvirt.sh.tpl @@ -24,13 +24,6 @@ if [ -f /tmp/vnc.crt ]; then mv /tmp/vnc-ca.crt /etc/pki/libvirt-vnc/ca-cert.pem fi -# TODO: We disable cgroup functionality for cgroup v2, we should fix this in the future -if $(stat -fc %T /sys/fs/cgroup/ | grep -q cgroup2fs); then - CGROUP_VERSION=v2 -else - CGROUP_VERSION=v1 -fi - if [ -n "$(cat /proc/*/comm 2>/dev/null | grep -w libvirtd)" ]; then set +x for proc in $(ls /proc/*/comm 2>/dev/null); do @@ -55,16 +48,14 @@ if [ "$(cat /etc/os-release | grep -w NAME= | grep -w CentOS)" ]; then fi fi -if [ $CGROUP_VERSION != "v2" ]; then - #Setup Cgroups to use when breaking out of Kubernetes defined groups - CGROUPS="" - for CGROUP in cpu rdma hugetlb; do - if [ -d /sys/fs/cgroup/${CGROUP} ]; then - CGROUPS+="${CGROUP}," - fi - done - cgcreate -g ${CGROUPS%,}:/osh-libvirt -fi +#Setup Cgroups to use when breaking out of Kubernetes defined groups +CGROUPS="" +for CGROUP in {{ .Values.conf.kubernetes.cgroup_controllers | include "helm-toolkit.utils.joinListWithSpace" }}; do + if [ -d /sys/fs/cgroup/${CGROUP} ] || grep -w $CGROUP /sys/fs/cgroup/cgroup.controllers; then + CGROUPS+="${CGROUP}," + fi +done +cgcreate -g ${CGROUPS%,}:/osh-libvirt # We assume that if hugepage count > 0, then hugepages should be exposed to libvirt/qemu hp_count="$(cat /proc/meminfo | grep HugePages_Total | tr -cd '[:digit:]')" @@ -86,50 +77,11 @@ if [ 0"$hp_count" -gt 0 ]; then echo "ERROR: Hugepages configured in kernel, but libvirtd container cannot access /dev/hugepages" exit 1 fi - - if [ $CGROUP_VERSION != "v2" ]; then - # Kubernetes 1.10.x introduced cgroup changes that caused the container's - # hugepage byte limit quota to zero out. This workaround sets that pod limit - # back to the total number of hugepage bytes available to the baremetal host. - if [ -d /sys/fs/cgroup/hugetlb ]; then - limits="$(ls /sys/fs/cgroup/hugetlb/{{ .Values.conf.kubernetes.cgroup }}/hugetlb.*.limit_in_bytes)" || \ - (echo "ERROR: Failed to locate any hugetable limits. Did you set the correct cgroup in your values used for this chart?" - exit 1) - for limit in $limits; do - target="/sys/fs/cgroup/hugetlb/$(dirname $(awk -F: '($2~/hugetlb/){print $3}' /proc/self/cgroup))/$(basename $limit)" - # Ensure the write target for the hugepage limit for the pod exists - if [ ! -f "$target" ]; then - echo "ERROR: Could not find write target for hugepage limit: $target" - fi - - # Write hugetable limit for pod - echo "$(cat $limit)" > "$target" - done - fi - - # Determine OS default hugepage size to use for the hugepage write test - default_hp_kb="$(cat /proc/meminfo | grep Hugepagesize | tr -cd '[:digit:]')" - - # Attempt to write to the hugepage mount to ensure it is operational, but only - # if we have at least 1 free page. - num_free_pages="$(cat /sys/kernel/mm/hugepages/hugepages-${default_hp_kb}kB/free_hugepages | tr -cd '[:digit:]')" - echo "INFO: '$num_free_pages' free hugepages of size ${default_hp_kb}kB" - if [ 0"$num_free_pages" -gt 0 ]; then - (fallocate -o0 -l "$default_hp_kb" /dev/hugepages/foo && rm /dev/hugepages/foo) || \ - (echo "ERROR: fallocate failed test at /dev/hugepages with size ${default_hp_kb}kB" - rm /dev/hugepages/foo - exit 1) - fi - fi fi if [ -n "${LIBVIRT_CEPH_CINDER_SECRET_UUID}" ] || [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then - if [ $CGROUP_VERSION != "v2" ]; then - #NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied. - cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen & - else - systemd-run --scope --slice=system libvirtd --listen & - fi + + cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen & tmpsecret=$(mktemp --suffix .xml) if [ -n "${LIBVIRT_EXTERNAL_CEPH_CINDER_SECRET_UUID}" ] ; then @@ -205,9 +157,5 @@ EOF fi -if [ $CGROUP_VERSION != "v2" ]; then - #NOTE(portdirect): run libvirtd as a transient unit on the host with the osh-libvirt cgroups applied. - cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen -else - systemd-run --scope --slice=system libvirtd --listen -fi +# NOTE(vsaienko): changing CGROUP is required as restart of the pod will cause domains restarts +cgexec -g ${CGROUPS%,}:/osh-libvirt systemd-run --scope --slice=system libvirtd --listen diff --git a/libvirt/values.yaml b/libvirt/values.yaml index b3a4373b..7f41ae60 100644 --- a/libvirt/values.yaml +++ b/libvirt/values.yaml @@ -125,6 +125,20 @@ conf: group: "kvm" kubernetes: cgroup: "kubepods.slice" + # List of cgroup controller we want to use when breaking out of + # Kubernetes defined groups + cgroup_controllers: + - blkio + - cpu + - devices + - freezer + - hugetlb + - memory + - net_cls + - perf_event + - rdma + - misc + - pids vencrypt: # Issuer to use for the vencrypt certs. issuer: -- 2.34.1