Add kubernetes 1.22.5 pkg
Here are the changes needed for adding k8s v1.22.5 in StarlingX alongside with the changes needed for the build environment to find and build the package. The package builds successfully. Deployed an iso with k8s 1.22.5 on AIO-SX and AIO-DX. The deployment phase works and the pods are up and running after the upgrade completes. Story: 2009789 Task: 44305 Signed-off-by: Daniel Safta <daniel.safta@windriver.com> Change-Id: Ibb9be075fa0b1491b9ab1854ebb1fddf4df53461
This commit is contained in:
parent
df3b902e6b
commit
a046a7a650
@ -168,6 +168,9 @@ kubernetes-1.20.9-client
|
||||
kubernetes-1.21.8-node
|
||||
kubernetes-1.21.8-kubeadm
|
||||
kubernetes-1.21.8-client
|
||||
kubernetes-1.22.5-node
|
||||
kubernetes-1.22.5-kubeadm
|
||||
kubernetes-1.22.5-client
|
||||
containerd
|
||||
k8s-pod-recovery
|
||||
k8s-cni-cache-cleanup
|
||||
|
@ -58,6 +58,7 @@ kubernetes/kubernetes-1.18.1
|
||||
kubernetes/kubernetes-1.19.13
|
||||
kubernetes/kubernetes-1.20.9
|
||||
kubernetes/kubernetes-1.21.8
|
||||
kubernetes/kubernetes-1.22.5
|
||||
kubernetes/kubernetes-unversioned
|
||||
kubernetes/docker-distribution
|
||||
kubernetes/etcd
|
||||
|
@ -46,6 +46,7 @@ kubernetes-v1.18.1.tar.gz#kubernetes-1.18.1#https://github.com/kubernetes/kubern
|
||||
kubernetes-v1.19.13.tar.gz#kubernetes-1.19.13#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.19.13.tar.gz#http##
|
||||
kubernetes-v1.20.9.tar.gz#kubernetes-1.20.9#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.20.9.tar.gz#http##
|
||||
kubernetes-v1.21.8.tar.gz#kubernetes-1.21.8#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.21.8.tar.gz#http##
|
||||
kubernetes-v1.22.5.tar.gz#kubernetes-1.22.5#https://github.com/kubernetes/kubernetes/archive/refs/tags/v1.22.5.tar.gz#http##
|
||||
kvm-unit-tests.git-4ea7633.tar.bz2#kvm-unit-tests#https://git.kernel.org/pub/scm/virt/kvm/kvm-unit-tests.git/snapshot/kvm-unit-tests-20171020.tar.gz#http##
|
||||
ldapscripts-2.0.8.tgz#ldapscripts-2.0.8#https://sourceforge.net/projects/ldapscripts/files/ldapscripts/ldapscripts-2.0.8/ldapscripts-2.0.8.tgz/download#http##
|
||||
libbpf-0.5.0.tar.gz#libbpf-0.5.0#https://github.com/libbpf/libbpf/archive/v0.5.0.tar.gz#https##
|
||||
|
4
kubernetes/kubernetes-1.22.5/centos/Readme.rst
Normal file
4
kubernetes/kubernetes-1.22.5/centos/Readme.rst
Normal file
@ -0,0 +1,4 @@
|
||||
The spec file used here was from the kubernetes 1.10.0 src rpm.
|
||||
The orig file is included to help show modifications made to that
|
||||
spec file, to help understand which changes were needed and to
|
||||
assist with future upversioning.
|
5
kubernetes/kubernetes-1.22.5/centos/build_srpm.data
Normal file
5
kubernetes/kubernetes-1.22.5/centos/build_srpm.data
Normal file
@ -0,0 +1,5 @@
|
||||
VERSION=1.22.5
|
||||
TAR_NAME=kubernetes
|
||||
TAR="$TAR_NAME-v$VERSION.tar.gz"
|
||||
COPY_LIST="${CGCS_BASE}/downloads/$TAR $FILES_BASE/*"
|
||||
TIS_PATCH_VER=PKG_GITREVCOUNT
|
@ -0,0 +1,117 @@
|
||||
From b90e3858a8d319c7526dd8190ee05edce24ba072 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Thu, 9 Sep 2021 13:16:26 -0400
|
||||
Subject: [PATCH 7/7] Revert "use subpath for coredns only for default
|
||||
repository"
|
||||
|
||||
This reverts commit 38a41e1557649a7cc763bf737779db9aa03ec75e.
|
||||
---
|
||||
cmd/kubeadm/app/constants/constants.go | 2 +-
|
||||
cmd/kubeadm/app/images/images.go | 5 ---
|
||||
cmd/kubeadm/app/images/images_test.go | 50 --------------------------
|
||||
3 files changed, 1 insertion(+), 56 deletions(-)
|
||||
|
||||
diff --git a/cmd/kubeadm/app/constants/constants.go b/cmd/kubeadm/app/constants/constants.go
|
||||
index b3559734..95338949 100644
|
||||
--- a/cmd/kubeadm/app/constants/constants.go
|
||||
+++ b/cmd/kubeadm/app/constants/constants.go
|
||||
@@ -322,7 +322,7 @@ const (
|
||||
CoreDNSDeploymentName = "coredns"
|
||||
|
||||
// CoreDNSImageName specifies the name of the image for CoreDNS add-on
|
||||
- CoreDNSImageName = "coredns"
|
||||
+ CoreDNSImageName = "coredns/coredns"
|
||||
|
||||
// CoreDNSVersion is the version of CoreDNS to be deployed if it is used
|
||||
CoreDNSVersion = "v1.8.4"
|
||||
diff --git a/cmd/kubeadm/app/images/images.go b/cmd/kubeadm/app/images/images.go
|
||||
index 7e97dbc9..187c7189 100644
|
||||
--- a/cmd/kubeadm/app/images/images.go
|
||||
+++ b/cmd/kubeadm/app/images/images.go
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
"fmt"
|
||||
|
||||
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
|
||||
- kubeadmapiv1beta2 "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||
kubeadmutil "k8s.io/kubernetes/cmd/kubeadm/app/util"
|
||||
|
||||
@@ -48,10 +47,6 @@ func GetDNSImage(cfg *kubeadmapi.ClusterConfiguration) string {
|
||||
if cfg.DNS.ImageRepository != "" {
|
||||
dnsImageRepository = cfg.DNS.ImageRepository
|
||||
}
|
||||
- // Handle the renaming of the official image from "k8s.gcr.io/coredns" to "k8s.gcr.io/coredns/coredns
|
||||
- if dnsImageRepository == kubeadmapiv1beta2.DefaultImageRepository {
|
||||
- dnsImageRepository = fmt.Sprintf("%s/coredns", dnsImageRepository)
|
||||
- }
|
||||
// DNS uses an imageTag that corresponds to the DNS version matching the Kubernetes version
|
||||
dnsImageTag := constants.CoreDNSVersion
|
||||
|
||||
diff --git a/cmd/kubeadm/app/images/images_test.go b/cmd/kubeadm/app/images/images_test.go
|
||||
index 2fd9d3f5..9f363939 100644
|
||||
--- a/cmd/kubeadm/app/images/images_test.go
|
||||
+++ b/cmd/kubeadm/app/images/images_test.go
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"testing"
|
||||
|
||||
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
|
||||
- kubeadmapiv1beta2 "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||
)
|
||||
|
||||
@@ -226,52 +225,3 @@ func TestGetAllImages(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
-
|
||||
-func TestGetDNSImage(t *testing.T) {
|
||||
- var tests = []struct {
|
||||
- expected string
|
||||
- cfg *kubeadmapi.ClusterConfiguration
|
||||
- }{
|
||||
- {
|
||||
- expected: "foo.io/coredns:v1.8.4",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: "foo.io",
|
||||
- DNS: kubeadmapi.DNS{
|
||||
- Type: kubeadmapi.CoreDNS,
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- {
|
||||
- expected: kubeadmapiv1beta2.DefaultImageRepository + "/coredns/coredns:v1.8.4",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: kubeadmapiv1beta2.DefaultImageRepository,
|
||||
- DNS: kubeadmapi.DNS{
|
||||
- Type: kubeadmapi.CoreDNS,
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- {
|
||||
- expected: "foo.io/coredns/coredns:v1.8.4",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: "foo.io",
|
||||
- DNS: kubeadmapi.DNS{
|
||||
- Type: kubeadmapi.CoreDNS,
|
||||
- ImageMeta: kubeadmapi.ImageMeta{
|
||||
- ImageRepository: "foo.io/coredns",
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- }
|
||||
-
|
||||
- for _, test := range tests {
|
||||
- actual := GetDNSImage(test.cfg)
|
||||
- if actual != test.expected {
|
||||
- t.Errorf(
|
||||
- "failed to GetDNSImage:\n\texpected: %s\n\t actual: %s",
|
||||
- test.expected,
|
||||
- actual,
|
||||
- )
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,79 @@
|
||||
From 087dcfa1a84ec38541fa9870937d76b80a707e2c Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 23 Oct 2020 17:46:10 -0600
|
||||
Subject: [PATCH 6/7] enable support for kubernetes to ignore isolcpus
|
||||
|
||||
The normal mechanisms for allocating isolated CPUs do not allow
|
||||
a mix of isolated and exclusive CPUs in the same container. In
|
||||
order to allow this in *very* limited cases where the pod spec
|
||||
is known in advance we will add the ability to disable the normal
|
||||
isolcpus behaviour.
|
||||
|
||||
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
|
||||
will basically forget everything it knows about isolcpus and just
|
||||
treat them like regular CPUs.
|
||||
|
||||
The admin user can then rely on the fact that CPU allocation is
|
||||
deterministic to ensure that the isolcpus they configure end up being
|
||||
allocated to the correct pods.
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 8 ++++++++
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 7 +++++++
|
||||
2 files changed, 15 insertions(+)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 2f5b06dc..d9ec63bb 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
+ "os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -55,6 +56,13 @@ const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
// get the system-level isolated CPUs
|
||||
func getIsolcpus() cpuset.CPUSet {
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 72a99496..ee389a85 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "os"
|
||||
"strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -658,6 +659,12 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
|
||||
// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
// not create UID. We also need a way to properly stub devicemanager.
|
||||
if len(string(pod.UID)) == 0 {
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,108 @@
|
||||
From de653bd0823b248d623a39c17a3872e85ce952b0 Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 18:05:15 -0400
|
||||
Subject: [PATCH 5/7] kubeadm: create platform pods with zero CPU resources
|
||||
|
||||
We want to specify zero CPU resources when creating the manifests
|
||||
for the static platform pods, as a workaround for the lack of
|
||||
separate resource tracking for platform resources.
|
||||
|
||||
We also specify zero CPU resources for the coredns deployment.
|
||||
manifests.go appears to be the main file for this, not sure if the
|
||||
others are used but I changed them just in case.
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
---
|
||||
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
|
||||
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
|
||||
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
|
||||
5 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
index 4ee054f8ba5..d2b58f4af0e 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
limits:
|
||||
memory: __DNS__MEMORY__LIMIT__
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
index 1f791e447c9..ff03a801646 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
limits:
|
||||
memory: 'dns_memory_limit'
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
index 4d64278aaa4..38fc9196b28 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
@@ -138,7 +138,7 @@ spec:
|
||||
limits:
|
||||
memory: $DNS_MEMORY_LIMIT
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
index 3ac6856bfc6..0763b4c63db 100644
|
||||
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
@@ -95,7 +95,7 @@ spec:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
index 8181bea63a4..4c4b4448dd4 100644
|
||||
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
@@ -60,7 +60,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("250m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||
@@ -72,7 +72,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("200m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||
@@ -83,7 +83,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("100m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||
}
|
||||
--
|
||||
2.17.1
|
||||
|
17
kubernetes/kubernetes-1.22.5/centos/files/kubeadm.conf
Normal file
17
kubernetes/kubernetes-1.22.5/centos/files/kubeadm.conf
Normal file
@ -0,0 +1,17 @@
|
||||
# Note: This dropin only works with kubeadm and kubelet v1.11+
|
||||
[Service]
|
||||
Environment="KUBELET_KUBECONFIG_ARGS=--bootstrap-kubeconfig=/etc/kubernetes/bootstrap-kubelet.conf --kubeconfig=/etc/kubernetes/kubelet.conf"
|
||||
Environment="KUBELET_CONFIG_ARGS=--config=/var/lib/kubelet/config.yaml"
|
||||
# This is a file that "kubeadm init" and "kubeadm join" generates at runtime, populating the KUBELET_KUBEADM_ARGS variable dynamically
|
||||
EnvironmentFile=-/var/lib/kubelet/kubeadm-flags.env
|
||||
# This is a file that the user can use for overrides of the kubelet args as a last resort. Preferably, the user should use
|
||||
# the .NodeRegistration.KubeletExtraArgs object in the configuration files instead. KUBELET_EXTRA_ARGS should be sourced from this file.
|
||||
EnvironmentFile=-/etc/sysconfig/kubelet
|
||||
ExecStart=
|
||||
ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS
|
||||
ExecStartPre=-/usr/bin/kubelet-cgroup-setup.sh
|
||||
ExecStartPost=/bin/bash -c 'echo $MAINPID > /var/run/kubelet.pid;'
|
||||
ExecStopPost=/bin/rm -f /var/run/kubelet.pid
|
||||
Restart=always
|
||||
StartLimitInterval=0
|
||||
RestartSec=10
|
@ -0,0 +1,132 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# This script does minimal cgroup setup for kubelet. This creates k8s-infra
|
||||
# cgroup for a minimal set of resource controllers, and configures cpuset
|
||||
# attributes to span all online cpus and nodes. This will do nothing if
|
||||
# the k8s-infra cgroup already exists (i.e., assume already configured).
|
||||
# NOTE: The creation of directories under /sys/fs/cgroup is volatile, and
|
||||
# does not persist reboots. The cpuset.mems and cpuset.cpus is later updated
|
||||
# by puppet kubernetes.pp manifest.
|
||||
#
|
||||
|
||||
# Define minimal path
|
||||
PATH=/bin:/usr/bin:/usr/local/bin
|
||||
|
||||
# Log info message to /var/log/daemon.log
|
||||
function LOG {
|
||||
logger -p daemon.info "$0($$): $@"
|
||||
}
|
||||
|
||||
# Log error message to /var/log/daemon.log
|
||||
function ERROR {
|
||||
logger -s -p daemon.error "$0($$): ERROR: $@"
|
||||
}
|
||||
|
||||
# Create minimal cgroup directories and configure cpuset attributes if required
|
||||
function create_cgroup {
|
||||
local cg_name=$1
|
||||
local cg_nodeset=$2
|
||||
local cg_cpuset=$3
|
||||
|
||||
local CGROUP=/sys/fs/cgroup
|
||||
local CONTROLLERS_AUTO_DELETED=("pids" "hugetlb")
|
||||
local CONTROLLERS_PRESERVED=("cpuset" "memory" "cpu,cpuacct" "systemd")
|
||||
local cnt=''
|
||||
local CGDIR=''
|
||||
local RC=0
|
||||
|
||||
# Ensure that these cgroups are created every time as they are auto deleted
|
||||
for cnt in ${CONTROLLERS_AUTO_DELETED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
continue
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# These cgroups are preserved so if any of these are encountered additional
|
||||
# cgroup setup is not required
|
||||
for cnt in ${CONTROLLERS_PRESERVED[@]}; do
|
||||
CGDIR=${CGROUP}/${cnt}/${cg_name}
|
||||
if [ -d ${CGDIR} ]; then
|
||||
LOG "Nothing to do, already configured: ${CGDIR}."
|
||||
exit ${RC}
|
||||
fi
|
||||
LOG "Creating: ${CGDIR}"
|
||||
mkdir -p ${CGDIR}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Creating: ${CGDIR}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
done
|
||||
|
||||
# Customize cpuset attributes
|
||||
LOG "Configuring cgroup: ${cg_name}, nodeset: ${cg_nodeset}, cpuset: ${cg_cpuset}"
|
||||
CGDIR=${CGROUP}/cpuset/${cg_name}
|
||||
local CGMEMS=${CGDIR}/cpuset.mems
|
||||
local CGCPUS=${CGDIR}/cpuset.cpus
|
||||
local CGTASKS=${CGDIR}/tasks
|
||||
|
||||
# Assign cgroup memory nodeset
|
||||
LOG "Assign nodeset ${cg_nodeset} to ${CGMEMS}"
|
||||
/bin/echo ${cg_nodeset} > ${CGMEMS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Unable to write to: ${CGMEMS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Assign cgroup cpus
|
||||
LOG "Assign cpuset ${cg_cpuset} to ${CGCPUS}"
|
||||
/bin/echo ${cg_cpuset} > ${CGCPUS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Assigning: ${cg_cpuset} to ${CGCPUS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file ownership
|
||||
chown root:root ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting owner for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
# Set file mode permissions
|
||||
chmod 644 ${CGMEMS} ${CGCPUS} ${CGTASKS}
|
||||
RC=$?
|
||||
if [ ${RC} -ne 0 ]; then
|
||||
ERROR "Setting mode for: ${CGMEMS}, ${CGCPUS}, ${CGTASKS}, rc=${RC}"
|
||||
exit ${RC}
|
||||
fi
|
||||
|
||||
return ${RC}
|
||||
}
|
||||
|
||||
if [ $UID -ne 0 ]; then
|
||||
ERROR "Require sudo/root."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure default kubepods cpuset to span all online cpus and nodes.
|
||||
ONLINE_NODESET=$(/bin/cat /sys/devices/system/node/online)
|
||||
ONLINE_CPUSET=$(/bin/cat /sys/devices/system/cpu/online)
|
||||
|
||||
# Configure kubelet cgroup to match cgroupRoot.
|
||||
create_cgroup 'k8s-infra' ${ONLINE_NODESET} ${ONLINE_CPUSET}
|
||||
|
||||
exit $?
|
||||
|
@ -0,0 +1,297 @@
|
||||
From 3f69868f7bca99f6875dd4d197b3a974d1b558ed Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Safta <daniel.safta@windriver.com>
|
||||
Date: Wed, 22 Sep 2021 10:09:06 -0400
|
||||
Subject: [PATCH 1/7] kubelet cpumanager disable CFS quota throttling for
|
||||
Guaranteed pods
|
||||
|
||||
This disables CFS CPU quota to avoid performance degradation due to
|
||||
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||
to solve the CFS throttling problem, but there are reports that it is
|
||||
not completely effective.
|
||||
|
||||
This disables CFS quota throttling for Guaranteed pods for both
|
||||
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||
Disabling has a dramatic latency improvement for HTTP response times.
|
||||
|
||||
This patch is refactored in 1.21.3 due to new internal_container_lifecycle
|
||||
framework. We leverage the same mechanism to set Linux resources as:
|
||||
cpu manager: specify the container CPU set during the creation
|
||||
(commit 38dc7509f862f081828e7d9167107b8c6e98ea23).
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 +++
|
||||
pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 12 +++--
|
||||
pkg/kubelet/cm/helpers_linux.go | 12 ++++-
|
||||
pkg/kubelet/cm/helpers_linux_test.go | 45 ++++++++++---------
|
||||
.../cm/internal_container_lifecycle_linux.go | 12 ++++-
|
||||
5 files changed, 61 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 4777c132..29194271 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -72,6 +72,9 @@ type Manager interface {
|
||||
// State returns a read-only interface to the internal CPU manager state.
|
||||
State() state.Reader
|
||||
|
||||
+ // GetCPUPolicy returns the assigned CPU manager policy
|
||||
+ GetCPUPolicy() string
|
||||
+
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
@@ -310,6 +313,10 @@ func (m *manager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
+func (m *manager) GetCPUPolicy() string {
|
||||
+ return m.policy.Name()
|
||||
+}
|
||||
+
|
||||
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
// The pod is during the admission phase. We need to save the pod to avoid it
|
||||
// being cleaned before the admission ended
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||
index 28578e64..018f13b6 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||
@@ -17,7 +17,7 @@ limitations under the License.
|
||||
package cpumanager
|
||||
|
||||
import (
|
||||
- "k8s.io/api/core/v1"
|
||||
+ v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/containermap"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
@@ -28,7 +28,8 @@ import (
|
||||
)
|
||||
|
||||
type fakeManager struct {
|
||||
- state state.State
|
||||
+ policy Policy
|
||||
+ state state.State
|
||||
}
|
||||
|
||||
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
@@ -70,6 +71,10 @@ func (m *fakeManager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
+func (m *fakeManager) GetCPUPolicy() string {
|
||||
+ return m.policy.Name()
|
||||
+}
|
||||
+
|
||||
func (m *fakeManager) GetCPUs(podUID, containerName string) cpuset.CPUSet {
|
||||
klog.InfoS("GetCPUs", "podUID", podUID, "containerName", containerName)
|
||||
return cpuset.CPUSet{}
|
||||
@@ -83,6 +88,7 @@ func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet {
|
||||
// NewFakeManager creates empty/fake cpu manager
|
||||
func NewFakeManager() Manager {
|
||||
return &fakeManager{
|
||||
- state: state.NewMemoryState(),
|
||||
+ policy: &nonePolicy{},
|
||||
+ state: state.NewMemoryState(),
|
||||
}
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||
index 33b14e31..a6e5326b 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||
@@ -25,7 +25,7 @@ import (
|
||||
|
||||
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
|
||||
- "k8s.io/api/core/v1"
|
||||
+ v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/kubernetes/pkg/api/v1/resource"
|
||||
@@ -167,6 +167,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
|
||||
// build the result
|
||||
result := &ResourceConfig{}
|
||||
if qosClass == v1.PodQOSGuaranteed {
|
||||
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||
+ // Linux kernel CFS throttle implementation.
|
||||
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||
+ // but there are reports that it is not completely effective.
|
||||
+ // This will configure cgroup CFS parameters at pod level:
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_quota_us
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_period_us
|
||||
+ cpuQuota = int64(-1)
|
||||
+ cpuPeriod = uint64(100000)
|
||||
+
|
||||
result.CpuShares = &cpuShares
|
||||
result.CpuQuota = &cpuQuota
|
||||
result.CpuPeriod = &cpuPeriod
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
index bdd01ea1..b907f575 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
@@ -1,3 +1,4 @@
|
||||
+//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
@@ -24,7 +25,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
- "k8s.io/api/core/v1"
|
||||
+ v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
||||
@@ -63,8 +64,9 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuotaPeriod := uint64(100000)
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -203,8 +205,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement": {
|
||||
pod: &v1.Pod{
|
||||
@@ -217,8 +219,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -231,8 +233,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -245,8 +247,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
}
|
||||
|
||||
@@ -283,8 +285,9 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuotaPeriod := uint64(100000)
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -423,8 +426,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement": {
|
||||
pod: &v1.Pod{
|
||||
@@ -437,8 +440,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -451,8 +454,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -465,8 +468,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
}
|
||||
|
||||
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
index 9cf41620..2e15d9ba 100644
|
||||
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
@@ -1,3 +1,4 @@
|
||||
+//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
@@ -22,8 +23,9 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
- "k8s.io/api/core/v1"
|
||||
+ v1 "k8s.io/api/core/v1"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
||||
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
)
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||
@@ -33,7 +35,13 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
||||
containerConfig.Linux.Resources.CpusetCpus = allocatedCPUs.String()
|
||||
}
|
||||
}
|
||||
-
|
||||
+ // Disable cgroup CFS throttle at the container level.
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
||||
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
||||
+ containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
||||
+ containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
||||
+ }
|
||||
if i.memoryManager != nil {
|
||||
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
|
||||
if numaNodes.Len() > 0 {
|
||||
--
|
||||
2.17.1
|
||||
|
||||
|
@ -0,0 +1,160 @@
|
||||
From 66f4a61ce77624dcc0b520e01fccacec61dfea37 Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <james.gauld@windriver.com>
|
||||
Date: Tue, 7 Sep 2021 14:21:03 -0400
|
||||
Subject: [PATCH 3/7] kubelet cpumanager infrastructure pods use system
|
||||
reserved CPUs
|
||||
|
||||
This assigns system infrastructure pods to the "reserved" cpuset
|
||||
to isolate them from the shared pool of CPUs.
|
||||
|
||||
Infrastructure pods include any pods that belong to the kube-system,
|
||||
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
||||
notification or flux-helm namespaces.
|
||||
|
||||
The implementation is a bit simplistic, it is assumed that the
|
||||
"reserved" cpuset is large enough to handle all infrastructure pods
|
||||
CPU allocations.
|
||||
|
||||
This also prevents infrastucture pods from using Guaranteed resources.
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 52 ++++++++++++++++---
|
||||
.../cm/cpumanager/policy_static_test.go | 19 ++++++-
|
||||
2 files changed, 63 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 89625fc6..492457b1 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -53,6 +53,11 @@ func (e SMTAlignmentError) Type() string {
|
||||
return ErrorSMTAlignment
|
||||
}
|
||||
|
||||
+// Define namespaces used by platform infrastructure pods
|
||||
+var infraNamespaces = [...]string{
|
||||
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm",
|
||||
+}
|
||||
+
|
||||
// staticPolicy is a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
@@ -141,12 +146,12 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved)
|
||||
|
||||
return &staticPolicy{
|
||||
- topology: topology,
|
||||
- reserved: reserved,
|
||||
+ topology: topology,
|
||||
+ reserved: reserved,
|
||||
excludeReserved: excludeReserved,
|
||||
- affinity: affinity,
|
||||
- cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
- options: opts,
|
||||
+ affinity: affinity,
|
||||
+ cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
+ options: opts,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -191,8 +196,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// - user tampered with file
|
||||
if !p.excludeReserved {
|
||||
if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reserved.String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
}
|
||||
// 2. Check if state for static policy is consistent
|
||||
@@ -262,6 +267,25 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) error {
|
||||
+ // Process infra pods before guaranteed pods
|
||||
+ if isKubeInfra(pod) {
|
||||
+ // Container belongs in reserved pool.
|
||||
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+ cpuset := p.reserved
|
||||
+ if cpuset.IsEmpty() {
|
||||
+ // If this happens then someone messed up.
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ }
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
if numCPUs := p.guaranteedCPUs(pod, container); numCPUs != 0 {
|
||||
klog.InfoS("Static policy: Allocate", "pod", klog.KObj(pod), "containerName", container.Name)
|
||||
// container belongs in an exclusively allocated pool
|
||||
@@ -366,6 +390,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||
return 0
|
||||
}
|
||||
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||
+ if isKubeInfra(pod) {
|
||||
+ return 0
|
||||
+ }
|
||||
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||
// https://golang.org/ref/spec#Numeric_types
|
||||
@@ -568,3 +596,13 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
||||
|
||||
return hints
|
||||
}
|
||||
+
|
||||
+// check if a given pod is in a platform infrastructure namespace
|
||||
+func isKubeInfra(pod *v1.Pod) bool {
|
||||
+ for _, namespace := range infraNamespaces {
|
||||
+ if namespace == pod.Namespace {
|
||||
+ return true
|
||||
+ }
|
||||
+ }
|
||||
+ return false
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 80bd04a1..34c5a23c 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -830,7 +830,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
-
|
||||
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||
+ infraPod.Namespace = "kube-system"
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -872,6 +873,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(4, 5),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0, 1),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
--
|
||||
2.17.1
|
@ -0,0 +1,564 @@
|
||||
From f3db147d0a40a6f336e9fa6d737e36366f9adf87 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Safta <daniel.safta@windriver.com>
|
||||
Date: Thu, 13 Jan 2022 11:08:48 +0200
|
||||
Subject: [PATCH 4/7] kubelet cpumanager introduce concept of isolated CPUs
|
||||
|
||||
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||
parameter.
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via
|
||||
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||
kubelet code will double-check that the specified isolated CPUs match
|
||||
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||
|
||||
A plugin (outside the scope of this commit) will expose the isolated
|
||||
CPUs to kubelet via the device plugin API.
|
||||
|
||||
If a pod specifies some number of "isolcpus" resources, the device
|
||||
manager will allocate them. In this code we check whether such
|
||||
resources have been allocated, and if so we set the container cpuset to
|
||||
the isolated CPUs. This does mean that it really only makes sense to
|
||||
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||
guaranteed ones since that would throw off the accounting code. In
|
||||
order to ensure the accounting still works as designed, if "isolcpus"
|
||||
are specified for guaranteed pods, the affinity will be set to the
|
||||
non-isolated CPUs.
|
||||
|
||||
This patch was refactored in 1.21.3 due to upstream API change
|
||||
node: podresources: make GetDevices() consistent
|
||||
(commit ad68f9588c72d6477b5a290c548a9031063ac659).
|
||||
|
||||
The routine podIsolCPUs() was refactored in 1.21.3 since the API
|
||||
p.deviceManager.GetDevices() is returning multiple devices with
|
||||
a device per cpu. The resultant cpuset needs to be the aggregate.
|
||||
|
||||
The routine NewStaticPolicy was refactored in 1.22.5, adding a new argument
|
||||
in its signature: cpuPolicyOptions map[string]string. This change is implies
|
||||
shifting the new arguments(deviceManager, excludeReserved) with one position
|
||||
to the right.
|
||||
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Co-authored-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/container_manager_linux.go | 2 +
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 35 +++++++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 14 +++-
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 82 +++++++++++++++++--
|
||||
.../cm/cpumanager/policy_static_test.go | 50 ++++++++---
|
||||
5 files changed, 164 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||
index d7aa6270..de68ad98 100644
|
||||
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||
@@ -1,3 +1,4 @@
|
||||
+//go:build linux
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
@@ -339,6 +340,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
+ cm.deviceManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to initialize cpu manager")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index fba0bfd1..1d0ff0e4 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -18,7 +18,9 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "io/ioutil"
|
||||
"math"
|
||||
+ "strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -32,6 +34,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
@@ -50,6 +53,25 @@ type policyName string
|
||||
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||
const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
+// get the system-level isolated CPUs
|
||||
+func getIsolcpus() cpuset.CPUSet {
|
||||
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ // The isolated cpus string ends in a newline
|
||||
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||
+ cset, err := cpuset.Parse(cpustring)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+
|
||||
+ return cset
|
||||
+}
|
||||
+
|
||||
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
@@ -149,7 +171,8 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManager creates new cpu manager based on provided policy
|
||||
-func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
+func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||
+
|
||||
var topo *topology.CPUTopology
|
||||
var policy Policy
|
||||
var err error
|
||||
@@ -190,7 +213,15 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
||||
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
// This variable is primarily to make testing easier.
|
||||
excludeReserved := true
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
||||
+
|
||||
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||
+ isolCPUs := getIsolcpus()
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, cpuPolicyOptions, deviceManager, excludeReserved)
|
||||
+ if err != nil {
|
||||
+ return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
+ }
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index bbfc70b8..39532b1c 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -37,6 +37,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
@@ -215,6 +216,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -230,8 +232,10 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
nil,
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -482,8 +486,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
}
|
||||
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
mockState := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -638,7 +643,9 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(sDir)
|
||||
|
||||
- mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||
+ testDM, err := devicemanager.NewManagerStub()
|
||||
+ mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.NewCPUSet(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||
+
|
||||
if testCase.expectedError != nil {
|
||||
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||
@@ -995,6 +1002,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1009,8 +1017,10 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
+ cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
nil,
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 953f41e9..ccac9f08 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
@@ -25,6 +26,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
)
|
||||
@@ -101,6 +103,10 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // subset of reserved CPUs with isolcpus attribute
|
||||
+ isolcpus cpuset.CPUSet
|
||||
+ // parent containerManager, used to get device list
|
||||
+ deviceManager devicemanager.Manager
|
||||
// If true, default CPUSet should exclude reserved CPUs
|
||||
excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
@@ -117,7 +123,8 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||
+
|
||||
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -144,10 +151,17 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
}
|
||||
|
||||
klog.InfoS("Reserved CPUs not available for exclusive assignment", "reservedSize", reserved.Size(), "reserved", reserved)
|
||||
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||
+ reserved = reserved.Union(isolCPUs)
|
||||
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||
+ }
|
||||
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ isolcpus: isolCPUs,
|
||||
+ deviceManager: deviceManager,
|
||||
excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
@@ -185,8 +199,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
} else {
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
}
|
||||
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
- allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, p.isolcpus, s.GetDefaultCPUSet())
|
||||
+
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -277,10 +292,11 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
return nil
|
||||
}
|
||||
|
||||
- cpuset := p.reserved
|
||||
+ cpuset := p.reserved.Clone().Difference(p.isolcpus)
|
||||
if cpuset.IsEmpty() {
|
||||
// If this happens then someone messed up.
|
||||
- return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved)
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reserved, p.isolcpus)
|
||||
+
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
@@ -324,8 +340,34 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
+ klog.Infof("[cpumanager] guaranteed: AddContainer "+
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
|
||||
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
|
||||
+ // container has requested isolated CPUs
|
||||
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ if set.Equals(isolcpus) {
|
||||
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ } else {
|
||||
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v (namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ }
|
||||
+ }
|
||||
+ // Note that we do not do anything about init containers here.
|
||||
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||
+ // TODO: confirm this is still true for 1.20
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||
+ klog.Infof("[cpumanager] isolcpus: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||
+ return nil
|
||||
}
|
||||
+
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
return nil
|
||||
}
|
||||
@@ -607,3 +649,33 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
+
|
||||
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||
+ if len(string(pod.UID)) == 0 {
|
||||
+ return cpuset.NewCPUSet()
|
||||
+ }
|
||||
+ resContDevices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||
+ cpuSet := cpuset.NewCPUSet()
|
||||
+ for resourceName, resourceDevs := range resContDevices {
|
||||
+ // this resource name needs to match the isolcpus device plugin
|
||||
+ if resourceName == "windriver.com/isolcpus" {
|
||||
+ for devID, _ := range resourceDevs {
|
||||
+ cpuStrList := []string{devID}
|
||||
+ if len(cpuStrList) > 0 {
|
||||
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||
+ for _, cpuStr := range cpuStrList {
|
||||
+ cpu, err := strconv.Atoi(cpuStr)
|
||||
+ if err != nil {
|
||||
+ panic(err)
|
||||
+ }
|
||||
+ cpuSet = cpuSet.Union(cpuset.NewCPUSet(cpu))
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return cpuSet
|
||||
+}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 34c5a23c..a0eb451b 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
)
|
||||
@@ -65,8 +66,9 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -76,6 +78,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStart(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "non-corrupted state",
|
||||
@@ -151,7 +154,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testDM, testCase.excludeReserved)
|
||||
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
@@ -199,7 +202,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
largeTopoCPUSet := largeTopoBuilder.Result()
|
||||
largeTopoSock0CPUSet := largeTopoSock0Builder.Result()
|
||||
largeTopoSock1CPUSet := largeTopoSock1Builder.Result()
|
||||
-
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
// these are the cases which must behave the same regardless the policy options.
|
||||
// So we will permutate the options to ensure this holds true.
|
||||
optionsInsensitiveTestCases := []staticPolicyTest{
|
||||
@@ -529,8 +532,9 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.options, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.options, testDM, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -573,6 +577,7 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
excludeReserved := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -631,7 +636,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -654,6 +659,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
excludeReserved := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -722,7 +728,8 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||
+
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -755,6 +762,7 @@ type staticPolicyTestWithResvList struct {
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
+ isolcpus cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
@@ -765,6 +773,8 @@ type staticPolicyTestWithResvList struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
+ testExcl := false
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "empty cpuset",
|
||||
@@ -794,11 +804,9 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
- testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
-
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -838,6 +846,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
@@ -850,6 +859,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
@@ -862,6 +872,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -878,6 +889,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
@@ -889,11 +901,29 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.NewCPUSet(0, 1),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.NewCPUSet(0, 1),
|
||||
+ isolcpus: cpuset.NewCPUSet(1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.NewCPUSet(0),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.17.1
|
||||
|
@ -0,0 +1,319 @@
|
||||
From 196f2843fe4e6746ca2ba6f97211d9a54733781c Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Safta <daniel.safta@windriver.com>
|
||||
Date: Wed, 12 Jan 2022 15:23:37 +0200
|
||||
Subject: [PATCH 2/7] kubelet cpumanager keep normal containers off reserved
|
||||
CPUs
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via '--system-reserved=cpu'
|
||||
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
||||
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
||||
pods won't run on the reserved CPUs.
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 ++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 11 ++++--
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 30 ++++++++++++---
|
||||
.../cm/cpumanager/policy_static_test.go | 38 ++++++++++++++-----
|
||||
4 files changed, 65 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 29194271..fba0bfd1 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -187,7 +187,11 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
||||
// exclusively allocated.
|
||||
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions)
|
||||
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
+ // This variable is primarily to make testing easier.
|
||||
+ excludeReserved := true
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
||||
+
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index db0a3560..bbfc70b8 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -215,6 +215,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -230,7 +231,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
0,
|
||||
cpuset.NewCPUSet(),
|
||||
topologymanager.NewFakeManager(),
|
||||
- nil)
|
||||
+ nil,
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -479,8 +481,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
|
||||
mockState := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -991,6 +994,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// above test cases are without kubelet --reserved-cpus cmd option
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1006,7 +1010,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
1,
|
||||
cpuset.NewCPUSet(0),
|
||||
topologymanager.NewFakeManager(),
|
||||
- nil)
|
||||
+ nil,
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index f5d275d8..3859ae72 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -96,6 +96,8 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reserved cpuset.CPUSet
|
||||
+ // If true, default CPUSet should exclude reserved CPUs
|
||||
+ excludeReserved bool
|
||||
// topology manager reference to get container Topology affinity
|
||||
affinity topologymanager.Store
|
||||
// set of CPUs to reuse across allocations in a pod
|
||||
@@ -110,7 +112,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
||||
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -141,6 +143,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
return &staticPolicy{
|
||||
topology: topology,
|
||||
reserved: reserved,
|
||||
+ excludeReserved: excludeReserved,
|
||||
affinity: affinity,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
options: opts,
|
||||
@@ -170,7 +173,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
// state is empty initialize
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
- s.SetDefaultCPUSet(allCPUs)
|
||||
+ if p.excludeReserved {
|
||||
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||
+ // unless explicitly affined.
|
||||
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reserved))
|
||||
+ } else {
|
||||
+ s.SetDefaultCPUSet(allCPUs)
|
||||
+ }
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
+ allCPUs, p.reserved, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -178,11 +189,12 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
- if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ if !p.excludeReserved {
|
||||
+ if !p.reserved.Intersection(tmpDefaultCPUset).Equals(p.reserved) {
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reserved.String(), tmpDefaultCPUset.String())
|
||||
+ }
|
||||
}
|
||||
-
|
||||
// 2. Check if state for static policy is consistent
|
||||
for pod := range tmpAssignments {
|
||||
for container, cset := range tmpAssignments[pod] {
|
||||
@@ -209,6 +221,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.UnionAll(tmpCPUSets)
|
||||
+ if p.excludeReserved {
|
||||
+ totalKnownCPUs = totalKnownCPUs.Union(p.reserved)
|
||||
+ }
|
||||
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||
@@ -294,6 +309,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
klog.InfoS("Static policy: RemoveContainer", "podUID", podUID, "containerName", containerName)
|
||||
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
s.Delete(podUID, containerName)
|
||||
+ if p.excludeReserved {
|
||||
+ toRelease = toRelease.Difference(p.reserved)
|
||||
+ }
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index d2b641fe..80bd04a1 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -33,6 +33,7 @@ type staticPolicyTest struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
+ excludeReserved bool
|
||||
podUID string
|
||||
options map[string]string
|
||||
containerName string
|
||||
@@ -64,7 +65,8 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -94,6 +96,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
expCSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
+ {
|
||||
+ description: "empty cpuset exclude reserved",
|
||||
+ topo: topoDualSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ excludeReserved: true,
|
||||
+ stAssignments: state.ContainerCPUAssignments{},
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(),
|
||||
+ expCSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
+ },
|
||||
{
|
||||
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -140,7 +151,8 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
+
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -211,7 +223,7 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
"fakeContainer100": cpuset.NewCPUSet(2, 3, 6, 7),
|
||||
},
|
||||
},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 4, 5),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(4, 5),
|
||||
pod: makePod("fakePod", "fakeContainer3", "2000m", "2000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -517,7 +529,8 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
}
|
||||
|
||||
func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.options)
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), testCase.options, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -559,6 +572,7 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -617,7 +631,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -639,6 +653,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -707,7 +722,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.NewCPUSet(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -779,9 +794,11 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -821,7 +838,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.NewCPUSet(0),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
||||
expCPUAlloc: false,
|
||||
@@ -833,7 +850,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.NewCPUSet(0, 1),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.NewCPUSet(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.NewCPUSet(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -857,8 +874,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := true
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.17.1
|
||||
|
2248
kubernetes/kubernetes-1.22.5/centos/kubernetes.spec
Normal file
2248
kubernetes/kubernetes-1.22.5/centos/kubernetes.spec
Normal file
File diff suppressed because it is too large
Load Diff
2282
kubernetes/kubernetes-1.22.5/centos/kubernetes.spec.orig
Normal file
2282
kubernetes/kubernetes-1.22.5/centos/kubernetes.spec.orig
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user