Merge "Add kubernetes 1.27.5 patches"
This commit is contained in:
commit
5ad7850d3c
@ -0,0 +1,126 @@
|
||||
From 745ca38039944f70ccb98cf9e75e3f439377399e Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Wed, 30 Aug 2023 04:20:24 -0400
|
||||
Subject: [PATCH] Revert "use subpath for coredns only for default repository"
|
||||
|
||||
This reverts commit 38a41e1557649a7cc763bf737779db9aa03ec75e.
|
||||
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
|
||||
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
cmd/kubeadm/app/constants/constants.go | 2 +-
|
||||
cmd/kubeadm/app/images/images.go | 5 ---
|
||||
cmd/kubeadm/app/images/images_test.go | 55 --------------------------
|
||||
3 files changed, 1 insertion(+), 61 deletions(-)
|
||||
|
||||
diff --git a/cmd/kubeadm/app/constants/constants.go b/cmd/kubeadm/app/constants/constants.go
|
||||
index 1146b67dd59..e89ae1d62df 100644
|
||||
--- a/cmd/kubeadm/app/constants/constants.go
|
||||
+++ b/cmd/kubeadm/app/constants/constants.go
|
||||
@@ -344,7 +344,7 @@ const (
|
||||
CoreDNSDeploymentName = "coredns"
|
||||
|
||||
// CoreDNSImageName specifies the name of the image for CoreDNS add-on
|
||||
- CoreDNSImageName = "coredns"
|
||||
+ CoreDNSImageName = "coredns/coredns"
|
||||
|
||||
// CoreDNSVersion is the version of CoreDNS to be deployed if it is used
|
||||
CoreDNSVersion = "v1.10.1"
|
||||
diff --git a/cmd/kubeadm/app/images/images.go b/cmd/kubeadm/app/images/images.go
|
||||
index a9a9528b669..5099b260530 100644
|
||||
--- a/cmd/kubeadm/app/images/images.go
|
||||
+++ b/cmd/kubeadm/app/images/images.go
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
|
||||
- kubeadmapiv1beta3 "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||
kubeadmutil "k8s.io/kubernetes/cmd/kubeadm/app/util"
|
||||
)
|
||||
@@ -48,10 +47,6 @@ func GetDNSImage(cfg *kubeadmapi.ClusterConfiguration) string {
|
||||
if cfg.DNS.ImageRepository != "" {
|
||||
dnsImageRepository = cfg.DNS.ImageRepository
|
||||
}
|
||||
- // Handle the renaming of the official image from "registry.k8s.io/coredns" to "registry.k8s.io/coredns/coredns
|
||||
- if dnsImageRepository == kubeadmapiv1beta3.DefaultImageRepository {
|
||||
- dnsImageRepository = fmt.Sprintf("%s/coredns", dnsImageRepository)
|
||||
- }
|
||||
// DNS uses an imageTag that corresponds to the DNS version matching the Kubernetes version
|
||||
dnsImageTag := constants.CoreDNSVersion
|
||||
|
||||
diff --git a/cmd/kubeadm/app/images/images_test.go b/cmd/kubeadm/app/images/images_test.go
|
||||
index 1aa08e30fa8..f19880cdc2e 100644
|
||||
--- a/cmd/kubeadm/app/images/images_test.go
|
||||
+++ b/cmd/kubeadm/app/images/images_test.go
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
"testing"
|
||||
|
||||
kubeadmapi "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm"
|
||||
- kubeadmapiv1beta3 "k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3"
|
||||
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
|
||||
)
|
||||
|
||||
@@ -235,57 +234,3 @@ func TestGetAllImages(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
-func TestGetDNSImage(t *testing.T) {
|
||||
- var tests = []struct {
|
||||
- expected string
|
||||
- cfg *kubeadmapi.ClusterConfiguration
|
||||
- }{
|
||||
- {
|
||||
- expected: "foo.io/coredns:v1.10.1",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: "foo.io",
|
||||
- DNS: kubeadmapi.DNS{},
|
||||
- },
|
||||
- },
|
||||
- {
|
||||
- expected: kubeadmapiv1beta3.DefaultImageRepository + "/coredns/coredns:v1.10.1",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: kubeadmapiv1beta3.DefaultImageRepository,
|
||||
- DNS: kubeadmapi.DNS{},
|
||||
- },
|
||||
- },
|
||||
- {
|
||||
- expected: "foo.io/coredns/coredns:v1.10.1",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: "foo.io",
|
||||
- DNS: kubeadmapi.DNS{
|
||||
- ImageMeta: kubeadmapi.ImageMeta{
|
||||
- ImageRepository: "foo.io/coredns",
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- {
|
||||
- expected: "foo.io/coredns/coredns:v1.11.0",
|
||||
- cfg: &kubeadmapi.ClusterConfiguration{
|
||||
- ImageRepository: "foo.io/coredns",
|
||||
- DNS: kubeadmapi.DNS{
|
||||
- ImageMeta: kubeadmapi.ImageMeta{
|
||||
- ImageTag: "v1.11.0",
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- },
|
||||
- }
|
||||
-
|
||||
- for _, test := range tests {
|
||||
- actual := GetDNSImage(test.cfg)
|
||||
- if actual != test.expected {
|
||||
- t.Errorf(
|
||||
- "failed to GetDNSImage:\n\texpected: %s\n\t actual: %s",
|
||||
- test.expected,
|
||||
- actual,
|
||||
- )
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,81 @@
|
||||
From 5c789dcd87cd6db69e53399a581d61c8fb308f7d Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Mon, 4 Sep 2023 08:25:03 -0400
|
||||
Subject: [PATCH] enable support for kubernetes to ignore isolcpus
|
||||
|
||||
The normal mechanisms for allocating isolated CPUs do not allow
|
||||
a mix of isolated and exclusive CPUs in the same container. In
|
||||
order to allow this in *very* limited cases where the pod spec
|
||||
is known in advance we will add the ability to disable the normal
|
||||
isolcpus behaviour.
|
||||
|
||||
If the file "/etc/kubernetes/ignore_isolcpus" exists, then kubelet
|
||||
will basically forget everything it knows about isolcpus and just
|
||||
treat them like regular CPUs.
|
||||
|
||||
The admin user can then rely on the fact that CPU allocation is
|
||||
deterministic to ensure that the isolcpus they configure end up being
|
||||
allocated to the correct pods.
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 8 ++++++++
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 7 +++++++
|
||||
2 files changed, 15 insertions(+)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 6e9d3938aef..0f48d521d6f 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -21,6 +21,7 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"math"
|
||||
+ "os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -56,6 +57,13 @@ const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
// get the system-level isolated CPUs
|
||||
func getIsolcpus() cpuset.CPUSet {
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ klog.Infof("[cpumanager] turning off isolcpus awareness")
|
||||
+ return cpuset.New()
|
||||
+ }
|
||||
+
|
||||
dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
if err != nil {
|
||||
klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index ab4164e8736..54bcfdb71a1 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "os"
|
||||
"strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
@@ -752,6 +753,12 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
|
||||
// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // This is a gross hack to basically turn off awareness of isolcpus to enable
|
||||
+ // isolated cpus to be allocated to pods the same way as non-isolated CPUs.
|
||||
+ if _, err := os.Stat("/etc/kubernetes/ignore_isolcpus"); err == nil {
|
||||
+ return cpuset.New()
|
||||
+ }
|
||||
+
|
||||
// NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
// not create UID. We also need a way to properly stub devicemanager.
|
||||
if len(string(pod.UID)) == 0 {
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,109 @@
|
||||
From 029d26fe15a99b9dffa124efae1c4614be48fd5b Mon Sep 17 00:00:00 2001
|
||||
From: Chris Friesen <chris.friesen@windriver.com>
|
||||
Date: Fri, 3 Sep 2021 18:05:15 -0400
|
||||
Subject: [PATCH] kubeadm: create platform pods with zero CPU resources
|
||||
|
||||
We want to specify zero CPU resources when creating the manifests
|
||||
for the static platform pods, as a workaround for the lack of
|
||||
separate resource tracking for platform resources.
|
||||
|
||||
We also specify zero CPU resources for the coredns deployment.
|
||||
manifests.go appears to be the main file for this, not sure if the
|
||||
others are used but I changed them just in case.
|
||||
|
||||
Signed-off-by: Daniel Safta <daniel.safta@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
cluster/addons/dns/coredns/coredns.yaml.base | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.in | 2 +-
|
||||
cluster/addons/dns/coredns/coredns.yaml.sed | 2 +-
|
||||
cmd/kubeadm/app/phases/addons/dns/manifests.go | 2 +-
|
||||
cmd/kubeadm/app/phases/controlplane/manifests.go | 6 +++---
|
||||
5 files changed, 7 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.base b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
index 8b6b2ab999c..4de5590dd81 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.base
|
||||
@@ -145,7 +145,7 @@ spec:
|
||||
limits:
|
||||
memory: __DNS__MEMORY__LIMIT__
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.in b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
index f35fe8cfe8d..44f1820b0a9 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.in
|
||||
@@ -145,7 +145,7 @@ spec:
|
||||
limits:
|
||||
memory: 'dns_memory_limit'
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cluster/addons/dns/coredns/coredns.yaml.sed b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
index 5ee04f2880f..13d4c7f745b 100644
|
||||
--- a/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
+++ b/cluster/addons/dns/coredns/coredns.yaml.sed
|
||||
@@ -145,7 +145,7 @@ spec:
|
||||
limits:
|
||||
memory: $DNS_MEMORY_LIMIT
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/addons/dns/manifests.go b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
index 0e3c6c98c29..97c5ff96d43 100644
|
||||
--- a/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/addons/dns/manifests.go
|
||||
@@ -104,7 +104,7 @@ spec:
|
||||
limits:
|
||||
memory: 170Mi
|
||||
requests:
|
||||
- cpu: 100m
|
||||
+ cpu: 0
|
||||
memory: 70Mi
|
||||
args: [ "-conf", "/etc/coredns/Corefile" ]
|
||||
volumeMounts:
|
||||
diff --git a/cmd/kubeadm/app/phases/controlplane/manifests.go b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
index 73f4fa56270..da52342a6f6 100644
|
||||
--- a/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
+++ b/cmd/kubeadm/app/phases/controlplane/manifests.go
|
||||
@@ -63,7 +63,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
ReadinessProbe: staticpodutil.ReadinessProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/readyz", int(endpoint.BindPort), v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetAPIServerProbeAddress(endpoint), "/livez", int(endpoint.BindPort), v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("250m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeAPIServer),
|
||||
map[string]string{kubeadmconstants.KubeAPIServerAdvertiseAddressEndpointAnnotationKey: endpoint.String()}),
|
||||
@@ -75,7 +75,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeControllerManager)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetControllerManagerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeControllerManagerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("200m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeControllerManager), nil),
|
||||
kubeadmconstants.KubeScheduler: staticpodutil.ComponentPod(v1.Container{
|
||||
@@ -86,7 +86,7 @@ func GetStaticPodSpecs(cfg *kubeadmapi.ClusterConfiguration, endpoint *kubeadmap
|
||||
VolumeMounts: staticpodutil.VolumeMountMapToSlice(mounts.GetVolumeMounts(kubeadmconstants.KubeScheduler)),
|
||||
LivenessProbe: staticpodutil.LivenessProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS),
|
||||
StartupProbe: staticpodutil.StartupProbe(staticpodutil.GetSchedulerProbeAddress(cfg), "/healthz", kubeadmconstants.KubeSchedulerPort, v1.URISchemeHTTPS, cfg.APIServer.TimeoutForControlPlane),
|
||||
- Resources: staticpodutil.ComponentResources("100m"),
|
||||
+ Resources: staticpodutil.ComponentResources("0"),
|
||||
Env: kubeadmutil.GetProxyEnvVars(),
|
||||
}, mounts.GetVolumes(kubeadmconstants.KubeScheduler), nil),
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,30 @@
|
||||
From 9ffea908e29ee477b75fe03baa881e83e8e7f429 Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Mon, 4 Sep 2023 08:05:29 -0400
|
||||
Subject: [PATCH] kubelet CFS quota throttling for non integer cpulimit
|
||||
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/internal_container_lifecycle_linux.go | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
index a99d01f8884..e5e25cd56de 100644
|
||||
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
@@ -39,7 +39,11 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
||||
// Disable cgroup CFS throttle at the container level.
|
||||
// /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
||||
// /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
||||
- if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
||||
+ // We can only set CpuQuota to -1 if we're allocating the entire CPU.
|
||||
+ // For fractional CPUs the CpuQuota is needed to enforce the limit.
|
||||
+ cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
|
||||
+ fractionalCpuQuantity := cpuQuantity.MilliValue()%1000
|
||||
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed && fractionalCpuQuantity == 0 {
|
||||
containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
||||
containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
||||
}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,256 @@
|
||||
From a5b09eb84feb744c4e3bc6eb1b8936ecd5f42874 Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Wed, 30 Aug 2023 06:01:30 -0400
|
||||
Subject: [PATCH] kubelet cpumanager disable CFS quota throttling
|
||||
|
||||
This disables CFS CPU quota to avoid performance degradation due to
|
||||
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
||||
to solve the CFS throttling problem, but there are reports that it is
|
||||
not completely effective.
|
||||
|
||||
This disables CFS quota throttling for Guaranteed pods for both
|
||||
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
||||
Disabling has a dramatic latency improvement for HTTP response times.
|
||||
|
||||
This patch is refactored in 1.22.5 due to new internal_container_lifecycle
|
||||
framework. We leverage the same mechanism to set Linux resources as:
|
||||
cpu manager: specify the container CPU set during the creation
|
||||
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 +++
|
||||
pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 10 ++++-
|
||||
pkg/kubelet/cm/helpers_linux.go | 10 +++++
|
||||
pkg/kubelet/cm/helpers_linux_test.go | 43 ++++++++++---------
|
||||
.../cm/internal_container_lifecycle_linux.go | 9 ++++
|
||||
5 files changed, 57 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 443eecd2d36..9e2dce60501 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -73,6 +73,9 @@ type Manager interface {
|
||||
// State returns a read-only interface to the internal CPU manager state.
|
||||
State() state.Reader
|
||||
|
||||
+ // GetCPUPolicy returns the assigned CPU manager policy
|
||||
+ GetCPUPolicy() string
|
||||
+
|
||||
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
||||
// and is consulted to achieve NUMA aware resource alignment among this
|
||||
// and other resource controllers.
|
||||
@@ -315,6 +318,10 @@ func (m *manager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
+func (m *manager) GetCPUPolicy() string {
|
||||
+ return m.policy.Name()
|
||||
+}
|
||||
+
|
||||
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
// The pod is during the admission phase. We need to save the pod to avoid it
|
||||
// being cleaned before the admission ended
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||
index 93369705135..2e277da9c84 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
||||
@@ -28,7 +28,8 @@ import (
|
||||
)
|
||||
|
||||
type fakeManager struct {
|
||||
- state state.State
|
||||
+ policy Policy
|
||||
+ state state.State
|
||||
}
|
||||
|
||||
func (m *fakeManager) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, containerRuntime runtimeService, initialContainers containermap.ContainerMap) error {
|
||||
@@ -70,6 +71,10 @@ func (m *fakeManager) State() state.Reader {
|
||||
return m.state
|
||||
}
|
||||
|
||||
+func (m *fakeManager) GetCPUPolicy() string {
|
||||
+ return m.policy.Name()
|
||||
+}
|
||||
+
|
||||
func (m *fakeManager) GetExclusiveCPUs(podUID, containerName string) cpuset.CPUSet {
|
||||
klog.InfoS("GetExclusiveCPUs", "podUID", podUID, "containerName", containerName)
|
||||
return cpuset.CPUSet{}
|
||||
@@ -88,6 +93,7 @@ func (m *fakeManager) GetCPUAffinity(podUID, containerName string) cpuset.CPUSet
|
||||
// NewFakeManager creates empty/fake cpu manager
|
||||
func NewFakeManager() Manager {
|
||||
return &fakeManager{
|
||||
- state: state.NewMemoryState(),
|
||||
+ policy: &nonePolicy{},
|
||||
+ state: state.NewMemoryState(),
|
||||
}
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
||||
index 18b0df17bfc..76db06a679f 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux.go
|
||||
@@ -170,6 +170,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
|
||||
// build the result
|
||||
result := &ResourceConfig{}
|
||||
if qosClass == v1.PodQOSGuaranteed {
|
||||
+ // Disable CFS CPU quota to avoid performance degradation due to
|
||||
+ // Linux kernel CFS throttle implementation.
|
||||
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
||||
+ // but there are reports that it is not completely effective.
|
||||
+ // This will configure cgroup CFS parameters at pod level:
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_quota_us
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_period_us
|
||||
+ cpuQuota = int64(-1)
|
||||
+ cpuPeriod = uint64(100000)
|
||||
+
|
||||
result.CPUShares = &cpuShares
|
||||
result.CPUQuota = &cpuQuota
|
||||
result.CPUPeriod = &cpuPeriod
|
||||
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
index fba41fd49be..60609394659 100644
|
||||
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
||||
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
||||
@@ -64,8 +64,9 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuotaPeriod := uint64(100000)
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -204,8 +205,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement": {
|
||||
pod: &v1.Pod{
|
||||
@@ -218,8 +219,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -232,8 +233,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -246,8 +247,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"burstable-partial-limits-with-init-containers": {
|
||||
pod: &v1.Pod{
|
||||
@@ -309,8 +310,10 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
burstablePartialShares := MilliCPUToShares(200)
|
||||
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
||||
guaranteedShares := MilliCPUToShares(100)
|
||||
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
||||
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
||||
+ guaranteedQuotaPeriod := uint64(100000)
|
||||
+ guaranteedQuota := int64(-1)
|
||||
+ guaranteedTunedQuota := int64(-1)
|
||||
+
|
||||
memoryQuantity = resource.MustParse("100Mi")
|
||||
cpuNoLimit := int64(-1)
|
||||
guaranteedMemory := memoryQuantity.Value()
|
||||
@@ -449,8 +452,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement": {
|
||||
pod: &v1.Pod{
|
||||
@@ -463,8 +466,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: defaultQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -477,8 +480,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: true,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &guaranteedTunedQuota, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
||||
pod: &v1.Pod{
|
||||
@@ -491,8 +494,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
enforceCPULimits: false,
|
||||
- quotaPeriod: tunedQuotaPeriod,
|
||||
- expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
+ quotaPeriod: guaranteedQuotaPeriod,
|
||||
+ expected: &ResourceConfig{CPUShares: &guaranteedShares, CPUQuota: &cpuNoLimit, CPUPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
||||
},
|
||||
}
|
||||
|
||||
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
index cb7c0cfa543..a99d01f8884 100644
|
||||
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
|
||||
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
||||
)
|
||||
|
||||
func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, container *v1.Container, containerConfig *runtimeapi.ContainerConfig) error {
|
||||
@@ -35,6 +36,14 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
||||
}
|
||||
}
|
||||
|
||||
+ // Disable cgroup CFS throttle at the container level.
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
||||
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
||||
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
||||
+ containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
||||
+ containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
||||
+ }
|
||||
+
|
||||
if i.memoryManager != nil {
|
||||
numaNodes := i.memoryManager.GetMemoryNUMANodes(pod, container)
|
||||
if numaNodes.Len() > 0 {
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,167 @@
|
||||
From bb0a722834dc5cc1caf545652828869a42b50ea2 Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Tue, 5 Sep 2023 06:27:39 -0400
|
||||
Subject: [PATCH] kubelet cpumanager infra pods use system reserved CPUs
|
||||
|
||||
This assigns system infrastructure pods to the "reserved" cpuset
|
||||
to isolate them from the shared pool of CPUs.
|
||||
|
||||
Infrastructure pods include any pods that belong to the kube-system,
|
||||
armada, cert-manager, vault, platform-deployment-manager, portieris,
|
||||
notification, flux-helm or metrics-server namespaces.
|
||||
|
||||
The implementation is a bit simplistic, it is assumed that the
|
||||
"reserved" cpuset is large enough to handle all infrastructure pods
|
||||
CPU allocations.
|
||||
|
||||
This also prevents infrastucture pods from using Guaranteed resources.
|
||||
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
|
||||
Signed-off-by: Thiago Miranda <ThiagoOliveira.Miranda@windriver.com>
|
||||
Signed-off-by: Kaustubh Dhokte <kaustubh.dhokte@windriver.com>
|
||||
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 50 ++++++++++++++++---
|
||||
.../cm/cpumanager/policy_static_test.go | 19 ++++++-
|
||||
2 files changed, 62 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index d25b2482537..1fdb49b52ad 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -62,6 +62,11 @@ func (e SMTAlignmentError) Type() string {
|
||||
return ErrorSMTAlignment
|
||||
}
|
||||
|
||||
+// Define namespaces used by platform infrastructure pods
|
||||
+var infraNamespaces = [...]string{
|
||||
+ "kube-system", "armada", "cert-manager", "platform-deployment-manager", "portieris", "vault", "notification", "flux-helm", "metrics-server",
|
||||
+}
|
||||
+
|
||||
// staticPolicy is a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
@@ -140,11 +145,11 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
klog.InfoS("Static policy created with configuration", "options", opts)
|
||||
|
||||
policy := &staticPolicy{
|
||||
- topology: topology,
|
||||
- affinity: affinity,
|
||||
+ topology: topology,
|
||||
+ affinity: affinity,
|
||||
excludeReserved: excludeReserved,
|
||||
- cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
- options: opts,
|
||||
+ cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
+ options: opts,
|
||||
}
|
||||
|
||||
allCPUs := topology.CPUDetails.CPUs()
|
||||
@@ -222,8 +227,8 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// - user tampered with file
|
||||
if !p.excludeReserved {
|
||||
if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
}
|
||||
}
|
||||
// 2. Check if state for static policy is consistent
|
||||
@@ -302,6 +307,25 @@ func (p *staticPolicy) updateCPUsToReuse(pod *v1.Pod, container *v1.Container, c
|
||||
}
|
||||
|
||||
func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Container) (rerr error) {
|
||||
+ // Process infra pods before guaranteed pods
|
||||
+ if isKubeInfra(pod) {
|
||||
+ // Container belongs in reserved pool.
|
||||
+ // We don't want to fall through to the p.guaranteedCPUs() clause below so return either nil or error.
|
||||
+ if _, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ klog.Infof("[cpumanager] static policy: reserved container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)", pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
+ cpuset := p.reservedCPUs
|
||||
+ if cpuset.IsEmpty() {
|
||||
+ // If this happens then someone messed up.
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs)
|
||||
+ }
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
+ klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
numCPUs := p.guaranteedCPUs(pod, container)
|
||||
if numCPUs == 0 {
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
@@ -453,6 +477,10 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
|
||||
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
|
||||
return 0
|
||||
}
|
||||
+ // Infrastructure pods use reserved CPUs even if they're in the Guaranteed QoS class
|
||||
+ if isKubeInfra(pod) {
|
||||
+ return 0
|
||||
+ }
|
||||
// Safe downcast to do for all systems with < 2.1 billion CPUs.
|
||||
// Per the language spec, `int` is guaranteed to be at least 32 bits wide.
|
||||
// https://golang.org/ref/spec#Numeric_types
|
||||
@@ -671,6 +699,16 @@ func (p *staticPolicy) generateCPUTopologyHints(availableCPUs cpuset.CPUSet, reu
|
||||
return hints
|
||||
}
|
||||
|
||||
+// check if a given pod is in a platform infrastructure namespace
|
||||
+func isKubeInfra(pod *v1.Pod) bool {
|
||||
+ for _, namespace := range infraNamespaces {
|
||||
+ if namespace == pod.Namespace {
|
||||
+ return true
|
||||
+ }
|
||||
+ }
|
||||
+ return false
|
||||
+}
|
||||
+
|
||||
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
|
||||
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
|
||||
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index c4675394a93..63f31486d19 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -988,7 +988,8 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
-
|
||||
+ infraPod := makePod("fakePod", "fakeContainer2", "200m", "200m")
|
||||
+ infraPod.Namespace = "kube-system"
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "GuPodSingleCore, SingleSocketHT, ExpectError",
|
||||
@@ -1030,6 +1031,22 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(4, 5),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.New(0, 1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.New(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.New(0, 1),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,743 @@
|
||||
From b51d6c0ba6dfd9a34c7f6832d17840820f9985eb Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Fri, 8 Sep 2023 10:46:07 -0400
|
||||
Subject: [PATCH] kubelet cpumanager introduce concept of isolated CPUs
|
||||
|
||||
This introduces the concept of "isolated CPUs", which are CPUs that
|
||||
have been isolated at the kernel level via the "isolcpus" kernel boot
|
||||
parameter.
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via
|
||||
'--system-reserved=cpu' will be used for infrastructure pods while the
|
||||
isolated CPUs should be reserved via '--kube-reserved=cpu' to cause
|
||||
kubelet to skip over them for "normal" CPU resource tracking. The
|
||||
kubelet code will double-check that the specified isolated CPUs match
|
||||
what the kernel exposes in "/sys/devices/system/cpu/isolated".
|
||||
|
||||
A plugin (outside the scope of this commit) will expose the isolated
|
||||
CPUs to kubelet via the device plugin API.
|
||||
|
||||
If a pod specifies some number of "isolcpus" resources, the device
|
||||
manager will allocate them. In this code we check whether such
|
||||
resources have been allocated, and if so we set the container cpuset to
|
||||
the isolated CPUs. This does mean that it really only makes sense to
|
||||
specify "isolcpus" resources for best-effort or burstable pods, not for
|
||||
guaranteed ones since that would throw off the accounting code. In
|
||||
order to ensure the accounting still works as designed, if "isolcpus"
|
||||
are specified for guaranteed pods, the affinity will be set to the
|
||||
non-isolated CPUs.
|
||||
|
||||
This patch was refactored in 1.21.3 due to upstream API change
|
||||
node: podresources: make GetDevices() consistent
|
||||
(commit ad68f9588c72d6477b5a290c548a9031063ac659).
|
||||
|
||||
The routine podIsolCPUs() was refactored in 1.21.3 since the API
|
||||
p.deviceManager.GetDevices() is returning multiple devices with
|
||||
a device per cpu. The resultant cpuset needs to be the aggregate.
|
||||
|
||||
The routine NewStaticPolicy was refactored in 1.22.5, adding a new argument
|
||||
in its signature: cpuPolicyOptions map[string]string. This change is implies
|
||||
shifting the new arguments(deviceManager, excludeReserved) with one position
|
||||
to the right.
|
||||
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Co-authored-by: Chris Friesen <chris.friesen@windriver.com>
|
||||
Signed-off-by: Gleb Aronsky <gleb.aronsky@windriver.com>
|
||||
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/container_manager_linux.go | 1 +
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 35 ++++++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 23 ++++-
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 83 ++++++++++++++--
|
||||
.../cm/cpumanager/policy_static_test.go | 53 ++++++++--
|
||||
pkg/kubelet/cm/devicemanager/manager_stub.go | 99 +++++++++++++++++++
|
||||
6 files changed, 273 insertions(+), 21 deletions(-)
|
||||
create mode 100644 pkg/kubelet/cm/devicemanager/manager_stub.go
|
||||
|
||||
diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go
|
||||
index 02cb34ddcdc..ae8daf2465f 100644
|
||||
--- a/pkg/kubelet/cm/container_manager_linux.go
|
||||
+++ b/pkg/kubelet/cm/container_manager_linux.go
|
||||
@@ -325,6 +325,7 @@ func NewContainerManager(mountUtil mount.Interface, cadvisorInterface cadvisor.I
|
||||
cm.GetNodeAllocatableReservation(),
|
||||
nodeConfig.KubeletRootDir,
|
||||
cm.topologyManager,
|
||||
+ cm.deviceManager,
|
||||
)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to initialize cpu manager")
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index e2c89efeb2e..6e9d3938aef 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -19,7 +19,9 @@ package cpumanager
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
+ "io/ioutil"
|
||||
"math"
|
||||
+ "strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -33,6 +35,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/config"
|
||||
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
|
||||
@@ -51,6 +54,25 @@ type policyName string
|
||||
// cpuManagerStateFileName is the file name where cpu manager stores its state
|
||||
const cpuManagerStateFileName = "cpu_manager_state"
|
||||
|
||||
+// get the system-level isolated CPUs
|
||||
+func getIsolcpus() cpuset.CPUSet {
|
||||
+ dat, err := ioutil.ReadFile("/sys/devices/system/cpu/isolated")
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to read sysfs isolcpus subdir")
|
||||
+ return cpuset.New()
|
||||
+ }
|
||||
+
|
||||
+ // The isolated cpus string ends in a newline
|
||||
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||
+ cset, err := cpuset.Parse(cpustring)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("[cpumanager] unable to parse sysfs isolcpus string to cpuset")
|
||||
+ return cpuset.New()
|
||||
+ }
|
||||
+
|
||||
+ return cset
|
||||
+}
|
||||
+
|
||||
// Manager interface provides methods for Kubelet to manage pod cpus.
|
||||
type Manager interface {
|
||||
// Start is called during Kubelet initialization.
|
||||
@@ -154,7 +176,8 @@ func (s *sourcesReadyStub) AddSource(source string) {}
|
||||
func (s *sourcesReadyStub) AllReady() bool { return true }
|
||||
|
||||
// NewManager creates new cpu manager based on provided policy
|
||||
-func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store) (Manager, error) {
|
||||
+func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconcilePeriod time.Duration, machineInfo *cadvisorapi.MachineInfo, specificCPUs cpuset.CPUSet, nodeAllocatableReservation v1.ResourceList, stateFileDirectory string, affinity topologymanager.Store, deviceManager devicemanager.Manager) (Manager, error) {
|
||||
+
|
||||
var topo *topology.CPUTopology
|
||||
var policy Policy
|
||||
var err error
|
||||
@@ -195,7 +218,15 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
||||
// NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
// This variable is primarily to make testing easier.
|
||||
excludeReserved := true
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
||||
+
|
||||
+ // isolCPUs is the set of kernel-isolated CPUs. They should be a subset of specificCPUs or
|
||||
+ // of the CPUs that NewStaticPolicy() will pick if numReservedCPUs is set. It's only in the
|
||||
+ // argument list here for ease of testing, it's really internal to the policy.
|
||||
+ isolCPUs := getIsolcpus()
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, isolCPUs, affinity, cpuPolicyOptions, deviceManager, excludeReserved)
|
||||
+ if err != nil {
|
||||
+ return nil, fmt.Errorf("new static policy error: %v", err)
|
||||
+ }
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index bb69b0ac084..44a88429a12 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -37,6 +37,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
)
|
||||
|
||||
@@ -215,6 +216,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -230,8 +232,10 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.New(),
|
||||
+ cpuset.New(),
|
||||
topologymanager.NewFakeManager(),
|
||||
nil,
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -482,8 +486,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
}
|
||||
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
mockState := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -638,7 +643,9 @@ func TestCPUManagerGenerate(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(sDir)
|
||||
|
||||
- mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.New(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||
+ testDM, err := devicemanager.NewManagerStub()
|
||||
+ mgr, err := NewManager(testCase.cpuPolicyName, nil, 5*time.Second, machineInfo, cpuset.New(), testCase.nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||
+
|
||||
if testCase.expectedError != nil {
|
||||
if !strings.Contains(err.Error(), testCase.expectedError.Error()) {
|
||||
t.Errorf("Unexpected error message. Have: %s wants %s", err.Error(), testCase.expectedError.Error())
|
||||
@@ -709,6 +716,7 @@ func TestCPUManagerRemove(t *testing.T) {
|
||||
|
||||
func TestReconcileState(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 8,
|
||||
@@ -727,8 +735,10 @@ func TestReconcileState(t *testing.T) {
|
||||
},
|
||||
0,
|
||||
cpuset.New(),
|
||||
+ cpuset.New(),
|
||||
topologymanager.NewFakeManager(),
|
||||
nil,
|
||||
+ testDM,
|
||||
testExcl)
|
||||
|
||||
testCases := []struct {
|
||||
@@ -1234,6 +1244,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1248,8 +1259,10 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.New(0),
|
||||
+ cpuset.New(),
|
||||
topologymanager.NewFakeManager(),
|
||||
nil,
|
||||
+ testDM,
|
||||
testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -1362,7 +1375,8 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
||||
}
|
||||
defer os.RemoveAll(sDir)
|
||||
|
||||
- _, err = NewManager(testCase.cpuPolicyName, testCase.cpuPolicyOptions, 5*time.Second, machineInfo, cpuset.New(), nodeAllocatableReservation, sDir, topologymanager.NewFakeManager())
|
||||
+ testDM, err := devicemanager.NewManagerStub()
|
||||
+ _, err = NewManager(testCase.cpuPolicyName, testCase.cpuPolicyOptions, 5*time.Second, machineInfo, cpuset.New(), nodeAllocatableReservation, sDir, topologymanager.NewFakeManager(), testDM)
|
||||
if err == nil {
|
||||
t.Errorf("Expected error, but NewManager succeeded")
|
||||
}
|
||||
@@ -1376,6 +1390,7 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
||||
|
||||
func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||
testExcl := false
|
||||
+ testDm, _ := devicemanager.NewManagerStub()
|
||||
nonePolicy, _ := NewNonePolicy(nil)
|
||||
staticPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -1391,8 +1406,10 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||
},
|
||||
1,
|
||||
cpuset.New(0),
|
||||
+ cpuset.New(),
|
||||
topologymanager.NewFakeManager(),
|
||||
nil,
|
||||
+ testDm,
|
||||
testExcl)
|
||||
|
||||
testCases := []struct {
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 1fdb49b52ad..49f63dd9efd 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -18,6 +18,7 @@ package cpumanager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
+ "strconv"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
@@ -28,6 +29,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
"k8s.io/kubernetes/pkg/kubelet/metrics"
|
||||
@@ -110,6 +112,10 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reservedCPUs cpuset.CPUSet
|
||||
+ // subset of reserved CPUs with isolcpus attribute
|
||||
+ isolcpus cpuset.CPUSet
|
||||
+ // parent containerManager, used to get device list
|
||||
+ deviceManager devicemanager.Manager
|
||||
// If true, default CPUSet should exclude reserved CPUs
|
||||
excludeReserved bool
|
||||
// Superset of reservedCPUs. It includes not just the reservedCPUs themselves,
|
||||
@@ -132,7 +138,8 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, isolCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, deviceManager devicemanager.Manager, excludeReserved bool) (Policy, error) {
|
||||
+
|
||||
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -147,6 +154,8 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
policy := &staticPolicy{
|
||||
topology: topology,
|
||||
affinity: affinity,
|
||||
+ isolcpus: isolCPUs,
|
||||
+ deviceManager: deviceManager,
|
||||
excludeReserved: excludeReserved,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
options: opts,
|
||||
@@ -183,6 +192,12 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
policy.reservedCPUs = reserved
|
||||
policy.reservedPhysicalCPUs = reservedPhysicalCPUs
|
||||
|
||||
+ if !isolCPUs.IsSubsetOf(reserved) {
|
||||
+ klog.Errorf("[cpumanager] isolCPUs %v is not a subset of reserved %v", isolCPUs, reserved)
|
||||
+ reserved = reserved.Union(isolCPUs)
|
||||
+ klog.Warningf("[cpumanager] mismatch isolCPUs %v, force reserved %v", isolCPUs, reserved)
|
||||
+ }
|
||||
+
|
||||
return policy, nil
|
||||
}
|
||||
|
||||
@@ -216,8 +231,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
} else {
|
||||
s.SetDefaultCPUSet(allCPUs)
|
||||
}
|
||||
- klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
- allCPUs, p.reservedCPUs, s.GetDefaultCPUSet())
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, isolcpus:%v, default:%v\n",
|
||||
+ allCPUs, p.reservedCPUs, p.isolcpus, s.GetDefaultCPUSet())
|
||||
+
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -316,16 +332,39 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
return nil
|
||||
}
|
||||
|
||||
- cpuset := p.reservedCPUs
|
||||
+ cpuset := p.reservedCPUs.Clone().Difference(p.isolcpus)
|
||||
if cpuset.IsEmpty() {
|
||||
// If this happens then someone messed up.
|
||||
- return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs)
|
||||
+ return fmt.Errorf("[cpumanager] static policy: reserved container unable to allocate cpus (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v, reserved:%v, isolcpus:%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset, p.reservedCPUs, p.isolcpus)
|
||||
+
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
klog.Infof("[cpumanager] static policy: reserved: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v", pod.Namespace, string(pod.UID), pod.Name, container.Name, cpuset)
|
||||
return nil
|
||||
}
|
||||
|
||||
+ if isolcpus := p.podIsolCPUs(pod, container); isolcpus.Size() > 0 {
|
||||
+ // container has requested isolated CPUs
|
||||
+ if set, ok := s.GetCPUSet(string(pod.UID), container.Name); ok {
|
||||
+ if set.Equals(isolcpus) {
|
||||
+ klog.Infof("[cpumanager] isolcpus container already present in state, skipping (namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ return nil
|
||||
+ } else {
|
||||
+ klog.Infof("[cpumanager] isolcpus container state has cpus %v, should be %v (namespace: %s, pod UID: %s, pod: %s, container: %s)",
|
||||
+ isolcpus, set, pod.Namespace, string(pod.UID), pod.Name, container.Name)
|
||||
+ }
|
||||
+ }
|
||||
+ // Note that we do not do anything about init containers here.
|
||||
+ // It looks like devices are allocated per-pod based on effective requests/limits
|
||||
+ // and extra devices from initContainers are not freed up when the regular containers start.
|
||||
+ // TODO: confirm this is still true for 1.20
|
||||
+ s.SetCPUSet(string(pod.UID), container.Name, isolcpus)
|
||||
+ klog.Infof("[cpumanager] isolcpus: AddContainer (namespace: %s, pod UID: %s, pod: %s, container: %s); cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, isolcpus)
|
||||
+ return nil
|
||||
+ }
|
||||
+
|
||||
numCPUs := p.guaranteedCPUs(pod, container)
|
||||
if numCPUs == 0 {
|
||||
// container belongs in the shared pool (nothing to do; use default cpuset)
|
||||
@@ -391,7 +430,9 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
|
||||
}
|
||||
s.SetCPUSet(string(pod.UID), container.Name, cpuset)
|
||||
p.updateCPUsToReuse(pod, container, cpuset)
|
||||
-
|
||||
+ klog.Infof("[cpumanager] guaranteed: AddContainer "+
|
||||
+ "(namespace: %s, pod UID: %s, pod: %s, container: %s); numCPUS=%d, cpuset=%v",
|
||||
+ pod.Namespace, string(pod.UID), pod.Name, container.Name, numCPUs, cpuset)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -709,6 +750,36 @@ func isKubeInfra(pod *v1.Pod) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
+// get the isolated CPUs (if any) from the devices associated with a specific container
|
||||
+func (p *staticPolicy) podIsolCPUs(pod *v1.Pod, container *v1.Container) cpuset.CPUSet {
|
||||
+ // NOTE: This is required for TestStaticPolicyAdd() since makePod() does
|
||||
+ // not create UID. We also need a way to properly stub devicemanager.
|
||||
+ if len(string(pod.UID)) == 0 {
|
||||
+ return cpuset.New()
|
||||
+ }
|
||||
+ resContDevices := p.deviceManager.GetDevices(string(pod.UID), container.Name)
|
||||
+ cpuSet := cpuset.New()
|
||||
+ for resourceName, resourceDevs := range resContDevices {
|
||||
+ // this resource name needs to match the isolcpus device plugin
|
||||
+ if resourceName == "windriver.com/isolcpus" {
|
||||
+ for devID, _ := range resourceDevs {
|
||||
+ cpuStrList := []string{devID}
|
||||
+ if len(cpuStrList) > 0 {
|
||||
+ // loop over the list of strings, convert each one to int, add to cpuset
|
||||
+ for _, cpuStr := range cpuStrList {
|
||||
+ cpu, err := strconv.Atoi(cpuStr)
|
||||
+ if err != nil {
|
||||
+ panic(err)
|
||||
+ }
|
||||
+ cpuSet = cpuSet.Union(cpuset.New(cpu))
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ return cpuSet
|
||||
+}
|
||||
+
|
||||
// isHintSocketAligned function return true if numa nodes in hint are socket aligned.
|
||||
func (p *staticPolicy) isHintSocketAligned(hint topologymanager.TopologyHint, minAffinitySize int) bool {
|
||||
numaNodesBitMask := hint.NUMANodeAffinity.GetBits()
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 63f31486d19..c25ee484a94 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -28,6 +28,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
|
||||
)
|
||||
@@ -70,8 +71,9 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -81,6 +83,7 @@ func TestStaticPolicyName(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStart(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "non-corrupted state",
|
||||
@@ -156,7 +159,7 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testCase.excludeReserved)
|
||||
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
@@ -204,7 +207,6 @@ func TestStaticPolicyAdd(t *testing.T) {
|
||||
largeTopoCPUSet := cpuset.New(largeTopoCPUids...)
|
||||
largeTopoSock0CPUSet := cpuset.New(largeTopoSock0CPUids...)
|
||||
largeTopoSock1CPUSet := cpuset.New(largeTopoSock1CPUids...)
|
||||
-
|
||||
// these are the cases which must behave the same regardless the policy options.
|
||||
// So we will permutate the options to ensure this holds true.
|
||||
|
||||
@@ -627,7 +629,9 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||
cpus = testCase.reservedCPUs.Clone()
|
||||
}
|
||||
testExcl := false
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options, testExcl)
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, cpus, tm, testCase.options, testDM, testExcl)
|
||||
+
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -674,6 +678,8 @@ func runStaticPolicyTestCaseWithFeatureGate(t *testing.T, testCase staticPolicyT
|
||||
}
|
||||
|
||||
func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []struct {
|
||||
staticPolicyTest
|
||||
expCSetAfterAlloc cpuset.CPUSet
|
||||
@@ -698,7 +704,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -731,6 +737,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
excludeReserved := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -789,7 +796,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -812,6 +819,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
excludeReserved := false
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -880,7 +888,8 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, excludeReserved)
|
||||
+
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -913,6 +922,7 @@ type staticPolicyTestWithResvList struct {
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
reserved cpuset.CPUSet
|
||||
+ isolcpus cpuset.CPUSet
|
||||
stAssignments state.ContainerCPUAssignments
|
||||
stDefaultCPUSet cpuset.CPUSet
|
||||
pod *v1.Pod
|
||||
@@ -923,6 +933,8 @@ type staticPolicyTestWithResvList struct {
|
||||
}
|
||||
|
||||
func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
+ testExcl := false
|
||||
testCases := []staticPolicyTestWithResvList{
|
||||
{
|
||||
description: "empty cpuset",
|
||||
@@ -952,10 +964,9 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
- testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, cpuset.New(), topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
@@ -996,6 +1007,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.New(0),
|
||||
+ isolcpus: cpuset.New(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
@@ -1008,6 +1020,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.New(0, 1),
|
||||
+ isolcpus: cpuset.New(),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
@@ -1020,6 +1033,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.New(0, 1),
|
||||
+ isolcpus: cpuset.New(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||
@@ -1036,6 +1050,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
topo: topoSingleSocketHT,
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.New(0, 1),
|
||||
+ isolcpus: cpuset.New(),
|
||||
stAssignments: state.ContainerCPUAssignments{
|
||||
"fakePod": map[string]cpuset.CPUSet{
|
||||
"fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||
@@ -1047,11 +1062,29 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
expCPUAlloc: true,
|
||||
expCSet: cpuset.New(0, 1),
|
||||
},
|
||||
+ {
|
||||
+ description: "InfraPod, SingleSocketHT, Isolcpus, ExpectAllocReserved",
|
||||
+ topo: topoSingleSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ reserved: cpuset.New(0, 1),
|
||||
+ isolcpus: cpuset.New(1),
|
||||
+ stAssignments: state.ContainerCPUAssignments{
|
||||
+ "fakePod": map[string]cpuset.CPUSet{
|
||||
+ "fakeContainer100": cpuset.New(2, 3, 6, 7),
|
||||
+ },
|
||||
+ },
|
||||
+ stDefaultCPUSet: cpuset.New(4, 5),
|
||||
+ pod: infraPod,
|
||||
+ expErr: nil,
|
||||
+ expCPUAlloc: true,
|
||||
+ expCSet: cpuset.New(0),
|
||||
+ },
|
||||
}
|
||||
|
||||
testExcl := true
|
||||
+ testDM, _ := devicemanager.NewManagerStub()
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, testCase.isolcpus, topologymanager.NewFakeManager(), nil, testDM, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
diff --git a/pkg/kubelet/cm/devicemanager/manager_stub.go b/pkg/kubelet/cm/devicemanager/manager_stub.go
|
||||
new file mode 100644
|
||||
index 00000000000..e6874f88d8a
|
||||
--- /dev/null
|
||||
+++ b/pkg/kubelet/cm/devicemanager/manager_stub.go
|
||||
@@ -0,0 +1,99 @@
|
||||
+/*
|
||||
+Copyright 2017 The Kubernetes Authors.
|
||||
+
|
||||
+Licensed under the Apache License, Version 2.0 (the "License");
|
||||
+you may not use this file except in compliance with the License.
|
||||
+You may obtain a copy of the License at
|
||||
+
|
||||
+ http://www.apache.org/licenses/LICENSE-2.0
|
||||
+
|
||||
+Unless required by applicable law or agreed to in writing, software
|
||||
+distributed under the License is distributed on an "AS IS" BASIS,
|
||||
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
+See the License for the specific language governing permissions and
|
||||
+limitations under the License.
|
||||
+*/
|
||||
+
|
||||
+package devicemanager
|
||||
+
|
||||
+import (
|
||||
+ v1 "k8s.io/api/core/v1"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/config"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/lifecycle"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/pluginmanager/cache"
|
||||
+ schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
|
||||
+)
|
||||
+
|
||||
+// ManagerStub provides a simple stub implementation for the Device Manager.
|
||||
+type ManagerStub struct{}
|
||||
+
|
||||
+// NewManagerStub creates a ManagerStub.
|
||||
+func NewManagerStub() (*ManagerStub, error) {
|
||||
+ return &ManagerStub{}, nil
|
||||
+}
|
||||
+
|
||||
+// Start simply returns nil.
|
||||
+func (h *ManagerStub) Start(activePods ActivePodsFunc, sourcesReady config.SourcesReady) error {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// Stop simply returns nil.
|
||||
+func (h *ManagerStub) Stop() error {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// Allocate simply returns nil.
|
||||
+func (h *ManagerStub) Allocate(pod *v1.Pod, container *v1.Container) error {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// UpdatePluginResources simply returns nil.
|
||||
+func (h *ManagerStub) UpdatePluginResources(node *schedulerframework.NodeInfo, attrs *lifecycle.PodAdmitAttributes) error {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// GetDeviceRunContainerOptions simply returns nil.
|
||||
+func (h *ManagerStub) GetDeviceRunContainerOptions(pod *v1.Pod, container *v1.Container) (*DeviceRunContainerOptions, error) {
|
||||
+ return nil, nil
|
||||
+}
|
||||
+
|
||||
+// GetCapacity simply returns nil capacity and empty removed resource list.
|
||||
+func (h *ManagerStub) GetCapacity() (v1.ResourceList, v1.ResourceList, []string) {
|
||||
+ return nil, nil, []string{}
|
||||
+}
|
||||
+
|
||||
+// GetWatcherHandler returns plugin watcher interface
|
||||
+func (h *ManagerStub) GetWatcherHandler() cache.PluginHandler {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// GetTopologyHints returns an empty TopologyHint map
|
||||
+func (h *ManagerStub) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
||||
+ return map[string][]topologymanager.TopologyHint{}
|
||||
+}
|
||||
+
|
||||
+// GetPodTopologyHints returns an empty TopologyHint map
|
||||
+func (h *ManagerStub) GetPodTopologyHints(pod *v1.Pod) map[string][]topologymanager.TopologyHint {
|
||||
+ return map[string][]topologymanager.TopologyHint{}
|
||||
+}
|
||||
+
|
||||
+// GetDevices returns nil
|
||||
+func (h *ManagerStub) GetDevices(_, _ string) ResourceDeviceInstances {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// GetAllocatableDevices returns nothing
|
||||
+func (h *ManagerStub) GetAllocatableDevices() ResourceDeviceInstances {
|
||||
+ return nil
|
||||
+}
|
||||
+
|
||||
+// ShouldResetExtendedResourceCapacity returns false
|
||||
+func (h *ManagerStub) ShouldResetExtendedResourceCapacity() bool {
|
||||
+ return false
|
||||
+}
|
||||
+
|
||||
+// UpdateAllocatedDevices returns nothing
|
||||
+func (h *ManagerStub) UpdateAllocatedDevices() {
|
||||
+ return
|
||||
+}
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,357 @@
|
||||
From 1d1addb2c0a6bac1513a83431998f17aec76cd20 Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Fri, 1 Sep 2023 07:15:25 -0400
|
||||
Subject: [PATCH] kubelet cpumanager keep normal containers off reserved CPUs
|
||||
|
||||
When starting the kubelet process, two separate sets of reserved CPUs
|
||||
may be specified. With this change CPUs reserved via
|
||||
'--system-reserved=cpu'
|
||||
or '--kube-reserved=cpu' will be ignored by kubernetes itself. A small
|
||||
tweak to the default CPU affinity ensures that "normal" Kubernetes
|
||||
pods won't run on the reserved CPUs.
|
||||
|
||||
Co-authored-by: Jim Gauld <james.gauld@windriver.com>
|
||||
Signed-off-by: Sachin Gopala Krishna <saching.krishna@windriver.com>
|
||||
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager.go | 6 ++-
|
||||
pkg/kubelet/cm/cpumanager/cpu_manager_test.go | 19 +++++++---
|
||||
pkg/kubelet/cm/cpumanager/policy_static.go | 30 ++++++++++++---
|
||||
.../cm/cpumanager/policy_static_test.go | 38 ++++++++++++++-----
|
||||
4 files changed, 71 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
index 9e2dce60501..e2c89efeb2e 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
||||
@@ -192,7 +192,11 @@ func NewManager(cpuPolicyName string, cpuPolicyOptions map[string]string, reconc
|
||||
// exclusively allocated.
|
||||
reservedCPUsFloat := float64(reservedCPUs.MilliValue()) / 1000
|
||||
numReservedCPUs := int(math.Ceil(reservedCPUsFloat))
|
||||
- policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions)
|
||||
+ // NOTE: Set excludeReserved unconditionally to exclude reserved CPUs from default cpuset.
|
||||
+ // This variable is primarily to make testing easier.
|
||||
+ excludeReserved := true
|
||||
+ policy, err = NewStaticPolicy(topo, numReservedCPUs, specificCPUs, affinity, cpuPolicyOptions, excludeReserved)
|
||||
+
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("new static policy error: %w", err)
|
||||
}
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
index 250f1eb014a..bb69b0ac084 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager_test.go
|
||||
@@ -215,6 +215,7 @@ func makeMultiContainerPod(initCPUs, appCPUs []struct{ request, limit string })
|
||||
}
|
||||
|
||||
func TestCPUManagerAdd(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -230,7 +231,8 @@ func TestCPUManagerAdd(t *testing.T) {
|
||||
0,
|
||||
cpuset.New(),
|
||||
topologymanager.NewFakeManager(),
|
||||
- nil)
|
||||
+ nil,
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -479,8 +481,9 @@ func TestCPUManagerAddWithInitContainers(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
|
||||
mockState := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -705,6 +708,7 @@ func TestCPUManagerRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestReconcileState(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 8,
|
||||
@@ -724,7 +728,8 @@ func TestReconcileState(t *testing.T) {
|
||||
0,
|
||||
cpuset.New(),
|
||||
topologymanager.NewFakeManager(),
|
||||
- nil)
|
||||
+ nil,
|
||||
+ testExcl)
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
@@ -1228,6 +1233,7 @@ func TestReconcileState(t *testing.T) {
|
||||
// above test cases are without kubelet --reserved-cpus cmd option
|
||||
// the following tests are with --reserved-cpus configured
|
||||
func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
+ testExcl := false
|
||||
testPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
NumCPUs: 4,
|
||||
@@ -1243,7 +1249,8 @@ func TestCPUManagerAddWithResvList(t *testing.T) {
|
||||
1,
|
||||
cpuset.New(0),
|
||||
topologymanager.NewFakeManager(),
|
||||
- nil)
|
||||
+ nil,
|
||||
+ testExcl)
|
||||
testCases := []struct {
|
||||
description string
|
||||
updateErr error
|
||||
@@ -1368,6 +1375,7 @@ func TestCPUManagerHandlePolicyOptions(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||
+ testExcl := false
|
||||
nonePolicy, _ := NewNonePolicy(nil)
|
||||
staticPolicy, _ := NewStaticPolicy(
|
||||
&topology.CPUTopology{
|
||||
@@ -1384,7 +1392,8 @@ func TestCPUManagerGetAllocatableCPUs(t *testing.T) {
|
||||
1,
|
||||
cpuset.New(0),
|
||||
topologymanager.NewFakeManager(),
|
||||
- nil)
|
||||
+ nil,
|
||||
+ testExcl)
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static.go b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
index 7a82de03da8..d25b2482537 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static.go
|
||||
@@ -105,6 +105,8 @@ type staticPolicy struct {
|
||||
topology *topology.CPUTopology
|
||||
// set of CPUs that is not available for exclusive assignment
|
||||
reservedCPUs cpuset.CPUSet
|
||||
+ // If true, default CPUSet should exclude reserved CPUs
|
||||
+ excludeReserved bool
|
||||
// Superset of reservedCPUs. It includes not just the reservedCPUs themselves,
|
||||
// but also any siblings of those reservedCPUs on the same physical die.
|
||||
// NOTE: If the reserved set includes full physical CPUs from the beginning
|
||||
@@ -125,7 +127,7 @@ var _ Policy = &staticPolicy{}
|
||||
// NewStaticPolicy returns a CPU manager policy that does not change CPU
|
||||
// assignments for exclusively pinned guaranteed containers after the main
|
||||
// container process starts.
|
||||
-func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string) (Policy, error) {
|
||||
+func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reservedCPUs cpuset.CPUSet, affinity topologymanager.Store, cpuPolicyOptions map[string]string, excludeReserved bool) (Policy, error) {
|
||||
opts, err := NewStaticPolicyOptions(cpuPolicyOptions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -140,6 +142,7 @@ func NewStaticPolicy(topology *topology.CPUTopology, numReservedCPUs int, reserv
|
||||
policy := &staticPolicy{
|
||||
topology: topology,
|
||||
affinity: affinity,
|
||||
+ excludeReserved: excludeReserved,
|
||||
cpusToReuse: make(map[string]cpuset.CPUSet),
|
||||
options: opts,
|
||||
}
|
||||
@@ -201,7 +204,15 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
// state is empty initialize
|
||||
allCPUs := p.topology.CPUDetails.CPUs()
|
||||
- s.SetDefaultCPUSet(allCPUs)
|
||||
+ if p.excludeReserved {
|
||||
+ // Exclude reserved CPUs from the default CPUSet to keep containers off them
|
||||
+ // unless explicitly affined.
|
||||
+ s.SetDefaultCPUSet(allCPUs.Difference(p.reservedCPUs))
|
||||
+ } else {
|
||||
+ s.SetDefaultCPUSet(allCPUs)
|
||||
+ }
|
||||
+ klog.Infof("[cpumanager] static policy: CPUSet: allCPUs:%v, reserved:%v, default:%v\n",
|
||||
+ allCPUs, p.reservedCPUs, s.GetDefaultCPUSet())
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -209,11 +220,12 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
// 1. Check if the reserved cpuset is not part of default cpuset because:
|
||||
// - kube/system reserved have changed (increased) - may lead to some containers not being able to start
|
||||
// - user tampered with file
|
||||
- if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||
- return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
- p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
+ if !p.excludeReserved {
|
||||
+ if !p.reservedCPUs.Intersection(tmpDefaultCPUset).Equals(p.reservedCPUs) {
|
||||
+ return fmt.Errorf("not all reserved cpus: \"%s\" are present in defaultCpuSet: \"%s\"",
|
||||
+ p.reservedCPUs.String(), tmpDefaultCPUset.String())
|
||||
+ }
|
||||
}
|
||||
-
|
||||
// 2. Check if state for static policy is consistent
|
||||
for pod := range tmpAssignments {
|
||||
for container, cset := range tmpAssignments[pod] {
|
||||
@@ -240,6 +252,9 @@ func (p *staticPolicy) validateState(s state.State) error {
|
||||
}
|
||||
}
|
||||
totalKnownCPUs = totalKnownCPUs.Union(tmpCPUSets...)
|
||||
+ if p.excludeReserved {
|
||||
+ totalKnownCPUs = totalKnownCPUs.Union(p.reservedCPUs)
|
||||
+ }
|
||||
if !totalKnownCPUs.Equals(p.topology.CPUDetails.CPUs()) {
|
||||
return fmt.Errorf("current set of available CPUs \"%s\" doesn't match with CPUs in state \"%s\"",
|
||||
p.topology.CPUDetails.CPUs().String(), totalKnownCPUs.String())
|
||||
@@ -374,6 +389,9 @@ func (p *staticPolicy) RemoveContainer(s state.State, podUID string, containerNa
|
||||
cpusInUse := getAssignedCPUsOfSiblings(s, podUID, containerName)
|
||||
if toRelease, ok := s.GetCPUSet(podUID, containerName); ok {
|
||||
s.Delete(podUID, containerName)
|
||||
+ if p.excludeReserved {
|
||||
+ toRelease = toRelease.Difference(p.reservedCPUs)
|
||||
+ }
|
||||
// Mutate the shared pool, adding released cpus.
|
||||
toRelease = toRelease.Difference(cpusInUse)
|
||||
s.SetDefaultCPUSet(s.GetDefaultCPUSet().Union(toRelease))
|
||||
diff --git a/pkg/kubelet/cm/cpumanager/policy_static_test.go b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
index 2c88dee0ba5..c4675394a93 100644
|
||||
--- a/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
+++ b/pkg/kubelet/cm/cpumanager/policy_static_test.go
|
||||
@@ -36,6 +36,7 @@ type staticPolicyTest struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
numReservedCPUs int
|
||||
+ excludeReserved bool
|
||||
reservedCPUs *cpuset.CPUSet
|
||||
podUID string
|
||||
options map[string]string
|
||||
@@ -69,7 +70,8 @@ func (spt staticPolicyTest) PseudoClone() staticPolicyTest {
|
||||
}
|
||||
|
||||
func TestStaticPolicyName(t *testing.T) {
|
||||
- policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(topoSingleSocketHT, 1, cpuset.New(), topologymanager.NewFakeManager(), nil, testExcl)
|
||||
|
||||
policyName := policy.Name()
|
||||
if policyName != "static" {
|
||||
@@ -99,6 +101,15 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
stDefaultCPUSet: cpuset.New(),
|
||||
expCSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
|
||||
},
|
||||
+ {
|
||||
+ description: "empty cpuset exclude reserved",
|
||||
+ topo: topoDualSocketHT,
|
||||
+ numReservedCPUs: 2,
|
||||
+ excludeReserved: true,
|
||||
+ stAssignments: state.ContainerCPUAssignments{},
|
||||
+ stDefaultCPUSet: cpuset.New(),
|
||||
+ expCSet: cpuset.New(1, 2, 3, 4, 5, 7, 8, 9, 10, 11),
|
||||
+ },
|
||||
{
|
||||
description: "reserved cores 0 & 6 are not present in available cpuset",
|
||||
topo: topoDualSocketHT,
|
||||
@@ -145,7 +156,8 @@ func TestStaticPolicyStart(t *testing.T) {
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
+ p, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
+
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -614,7 +626,8 @@ func runStaticPolicyTestCase(t *testing.T, testCase staticPolicyTest) {
|
||||
if testCase.reservedCPUs != nil {
|
||||
cpus = testCase.reservedCPUs.Clone()
|
||||
}
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options)
|
||||
+ testExcl := false
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpus, tm, testCase.options, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -685,7 +698,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, testCase.excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -717,6 +730,7 @@ func TestStaticPolicyReuseCPUs(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestStaticPolicyRemove(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []staticPolicyTest{
|
||||
{
|
||||
description: "SingleSocketHT, DeAllocOneContainer",
|
||||
@@ -775,7 +789,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
@@ -797,6 +811,7 @@ func TestStaticPolicyRemove(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
+ excludeReserved := false
|
||||
testCases := []struct {
|
||||
description string
|
||||
topo *topology.CPUTopology
|
||||
@@ -865,7 +880,7 @@ func TestTopologyAwareAllocateCPUs(t *testing.T) {
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
- p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil)
|
||||
+ p, _ := NewStaticPolicy(tc.topo, 0, cpuset.New(), topologymanager.NewFakeManager(), nil, excludeReserved)
|
||||
policy := p.(*staticPolicy)
|
||||
st := &mockState{
|
||||
assignments: tc.stAssignments,
|
||||
@@ -937,9 +952,11 @@ func TestStaticPolicyStartWithResvList(t *testing.T) {
|
||||
expNewErr: fmt.Errorf("[cpumanager] unable to reserve the required amount of CPUs (size of 0-1 did not equal 1)"),
|
||||
},
|
||||
}
|
||||
+ testExcl := false
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
- p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||
+ p, err := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
+
|
||||
if !reflect.DeepEqual(err, testCase.expNewErr) {
|
||||
t.Errorf("StaticPolicy Start() error (%v). expected error: %v but got: %v",
|
||||
testCase.description, testCase.expNewErr, err)
|
||||
@@ -979,7 +996,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 1,
|
||||
reserved: cpuset.New(0),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.New(1, 2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "8000m", "8000m"),
|
||||
expErr: fmt.Errorf("not enough cpus available to satisfy request"),
|
||||
expCPUAlloc: false,
|
||||
@@ -991,7 +1008,7 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
numReservedCPUs: 2,
|
||||
reserved: cpuset.New(0, 1),
|
||||
stAssignments: state.ContainerCPUAssignments{},
|
||||
- stDefaultCPUSet: cpuset.New(0, 1, 2, 3, 4, 5, 6, 7),
|
||||
+ stDefaultCPUSet: cpuset.New(2, 3, 4, 5, 6, 7),
|
||||
pod: makePod("fakePod", "fakeContainer2", "1000m", "1000m"),
|
||||
expErr: nil,
|
||||
expCPUAlloc: true,
|
||||
@@ -1015,8 +1032,9 @@ func TestStaticPolicyAddWithResvList(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
+ testExcl := true
|
||||
for _, testCase := range testCases {
|
||||
- policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil)
|
||||
+ policy, _ := NewStaticPolicy(testCase.topo, testCase.numReservedCPUs, testCase.reserved, topologymanager.NewFakeManager(), nil, testExcl)
|
||||
|
||||
st := &mockState{
|
||||
assignments: testCase.stAssignments,
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,50 @@
|
||||
From d4aa04d78e4a2692e93c1fa638dd624720a8504a Mon Sep 17 00:00:00 2001
|
||||
From: Jim Gauld <James.Gauld@windriver.com>
|
||||
Date: Fri, 11 Feb 2022 11:06:35 -0500
|
||||
Subject: [PATCH 04/10] kubelet: sort isolcpus allocation when SMT enabled
|
||||
|
||||
The existing device manager code returns CPUs as devices in unsorted
|
||||
order. This numerically sorts isolcpus allocations when SMT/HT is
|
||||
enabled on the host. This logs SMT pairs, singletons, and algorithm
|
||||
order details to make the algorithm understandable.
|
||||
|
||||
Signed-off-by: Jim Gauld <James.Gauld@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/devicemanager/manager.go | 13 ++++++++++++-
|
||||
1 file changed, 12 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go
|
||||
index 191861d9e4a..4c897f0e032 100644
|
||||
--- a/pkg/kubelet/cm/devicemanager/manager.go
|
||||
+++ b/pkg/kubelet/cm/devicemanager/manager.go
|
||||
@@ -545,7 +545,16 @@ func order_devices_by_sibling(devices sets.String, needed int) ([]string, error)
|
||||
return cpu_lst[0]
|
||||
}
|
||||
}
|
||||
+ //Make post-analysis of selection algorithm obvious by numerical sorting
|
||||
+ //the available isolated cpu_id.
|
||||
+ cpu_ids := make([]int, 0, int(devices.Len()))
|
||||
for cpu_id := range devices {
|
||||
+ cpu_id_, _ := strconv.Atoi(cpu_id)
|
||||
+ cpu_ids = append(cpu_ids, cpu_id_)
|
||||
+ }
|
||||
+ sort.Ints(cpu_ids)
|
||||
+ for _, _cpu_id := range cpu_ids {
|
||||
+ cpu_id := strconv.Itoa(_cpu_id)
|
||||
// If we've already found cpu_id as a sibling, skip it.
|
||||
if _, ok := _iterated_cpu[cpu_id]; ok {
|
||||
continue
|
||||
@@ -587,7 +596,9 @@ func order_devices_by_sibling(devices sets.String, needed int) ([]string, error)
|
||||
}
|
||||
}
|
||||
}
|
||||
- //klog.Infof("needed=%d ordered_cpu_list=%v", needed, dev_lst)
|
||||
+ //This algorithm will get some attention. Show minimal details.
|
||||
+ klog.Infof("order_devices_by_sibling: needed=%d, smtpairs=%v, singletons=%v, order=%v",
|
||||
+ needed, sibling_lst, single_lst, dev_lst)
|
||||
return dev_lst, nil
|
||||
}
|
||||
func smt_enabled() bool {
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,152 @@
|
||||
From f6c4493bb4a0683b27c91ed53d3a25c9ef7a65cb Mon Sep 17 00:00:00 2001
|
||||
From: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
Date: Mon, 4 Sep 2023 02:55:18 -0400
|
||||
Subject: [PATCH] kubernetes: make isolcpus allocation SMT-aware
|
||||
|
||||
Enhance isolcpus support in Kubernetes to allocate isolated SMT
|
||||
siblings to the same container when SMT/HT is enabled on the host.
|
||||
|
||||
As it stands, the device manager code in Kubernetes is not SMT-aware
|
||||
(since normally it doesn't deal with CPUs). However, StarlingX
|
||||
exposes isolated CPUs as devices and if possible we want to allocate
|
||||
all SMT siblings from a CPU core to the same container in order to
|
||||
minimize cross- container interference due to resource contention
|
||||
within the CPU core.
|
||||
|
||||
The solution is basically to take the list of isolated CPUs and
|
||||
re-order it so that the SMT siblings are next to each other. That
|
||||
way the existing resource selection code will allocate the siblings
|
||||
together. As an optimization, if it is known that an odd number
|
||||
of isolated CPUs are desired, a singleton SMT sibling will be
|
||||
inserted into the list to avoid breaking up sibling pairs.
|
||||
|
||||
Signed-off-by: Tao Wang <tao.wang@windriver.com>
|
||||
Signed-off-by: Ramesh Kumar Sivanandam <rameshkumar.sivanandam@windriver.com>
|
||||
Signed-off-by: Boovan Rajendran <boovan.rajendran@windriver.com>
|
||||
---
|
||||
pkg/kubelet/cm/devicemanager/manager.go | 84 ++++++++++++++++++++++++-
|
||||
1 file changed, 83 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/pkg/kubelet/cm/devicemanager/manager.go b/pkg/kubelet/cm/devicemanager/manager.go
|
||||
index 7499de4460f..b2d529de88f 100644
|
||||
--- a/pkg/kubelet/cm/devicemanager/manager.go
|
||||
+++ b/pkg/kubelet/cm/devicemanager/manager.go
|
||||
@@ -19,10 +19,13 @@ package devicemanager
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
+ "io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
+ "strconv"
|
||||
+ "strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -36,6 +39,7 @@ import (
|
||||
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
|
||||
+ "k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/checkpoint"
|
||||
plugin "k8s.io/kubernetes/pkg/kubelet/cm/devicemanager/plugin/v1beta1"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
@@ -526,6 +530,75 @@ func (m *ManagerImpl) UpdateAllocatedDevices() {
|
||||
m.allocatedDevices = m.podDevices.devices()
|
||||
}
|
||||
|
||||
+//Given a list of isolated CPUs in 'devices', and the number of desired CPUs in 'needed',
|
||||
+//return an ordered list of isolated CPUs such that the first 'needed' CPUs in the list
|
||||
+//contain as many hyperthread sibling pairs as possible.
|
||||
+func order_devices_by_sibling(devices sets.String, needed int) ([]string, error) {
|
||||
+ var dev_lst []string
|
||||
+ var single_lst []string
|
||||
+ sibling_lst := make([]string, 0, int(devices.Len()))
|
||||
+ _iterated_cpu := make(map[string]string)
|
||||
+ get_sibling := func(cpu string, cpu_lst []string) string {
|
||||
+ if cpu_lst[0] == cpu {
|
||||
+ return cpu_lst[1]
|
||||
+ } else {
|
||||
+ return cpu_lst[0]
|
||||
+ }
|
||||
+ }
|
||||
+ for cpu_id := range devices {
|
||||
+ // If we've already found cpu_id as a sibling, skip it.
|
||||
+ if _, ok := _iterated_cpu[cpu_id]; ok {
|
||||
+ continue
|
||||
+ }
|
||||
+ devPath := fmt.Sprintf("/sys/devices/system/cpu/cpu%s/topology/thread_siblings_list", cpu_id)
|
||||
+ dat, err := ioutil.ReadFile(devPath)
|
||||
+ if err != nil {
|
||||
+ return dev_lst, fmt.Errorf("Can't read cpu[%s] thread_siblings_list", cpu_id)
|
||||
+ }
|
||||
+ cpustring := strings.TrimSuffix(string(dat), "\n")
|
||||
+ cpu_pair_set, err := cpuset.Parse(cpustring)
|
||||
+ if err != nil {
|
||||
+ return dev_lst, fmt.Errorf("Unable to parse thread_siblings_list[%s] string to cpuset", cpustring)
|
||||
+ }
|
||||
+ var cpu_pair_lst []string
|
||||
+ for _, v := range cpu_pair_set.List() {
|
||||
+ cpu_pair_lst = append(cpu_pair_lst, strconv.Itoa(v))
|
||||
+ }
|
||||
+ sibling_cpu_id := get_sibling(cpu_id, cpu_pair_lst)
|
||||
+ if _, ok := devices[sibling_cpu_id]; ok {
|
||||
+ sibling_lst = append(sibling_lst, cpu_id, sibling_cpu_id)
|
||||
+ _iterated_cpu[sibling_cpu_id] = ""
|
||||
+ } else {
|
||||
+ single_lst = append(single_lst, cpu_id)
|
||||
+ }
|
||||
+ _iterated_cpu[cpu_id] = ""
|
||||
+ }
|
||||
+ if needed%2 == 0 {
|
||||
+ dev_lst = append(sibling_lst, single_lst...)
|
||||
+ } else {
|
||||
+ if len(single_lst) > 1 {
|
||||
+ _tmp_list := append(sibling_lst, single_lst[1:]...)
|
||||
+ dev_lst = append(single_lst[0:1], _tmp_list...)
|
||||
+ } else {
|
||||
+ if len(single_lst) == 0 {
|
||||
+ dev_lst = sibling_lst
|
||||
+ } else {
|
||||
+ dev_lst = append(single_lst, sibling_lst...)
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ //klog.Infof("needed=%d ordered_cpu_list=%v", needed, dev_lst)
|
||||
+ return dev_lst, nil
|
||||
+}
|
||||
+func smt_enabled() bool {
|
||||
+ dat, _ := ioutil.ReadFile("/sys/devices/system/cpu/smt/active")
|
||||
+ state := strings.TrimSuffix(string(dat), "\n")
|
||||
+ if state == "0" {
|
||||
+ return false
|
||||
+ }
|
||||
+ return true
|
||||
+}
|
||||
+
|
||||
// Returns list of device Ids we need to allocate with Allocate rpc call.
|
||||
// Returns empty list in case we don't need to issue the Allocate rpc call.
|
||||
func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, required int, reusableDevices sets.String) (sets.String, error) {
|
||||
@@ -575,7 +648,16 @@ func (m *ManagerImpl) devicesToAllocate(podUID, contName, resource string, requi
|
||||
// Create a closure to help with device allocation
|
||||
// Returns 'true' once no more devices need to be allocated.
|
||||
allocateRemainingFrom := func(devices sets.String) bool {
|
||||
- for device := range devices.Difference(allocated) {
|
||||
+ availableDevices := devices.Difference(allocated).List()
|
||||
+ // If we're dealing with isolcpus and SMT is enabled, reorder to group SMT siblings together.
|
||||
+ if resource == "windriver.com/isolcpus" && len(devices) > 0 && smt_enabled() {
|
||||
+ var err error
|
||||
+ availableDevices, err = order_devices_by_sibling(devices.Difference(allocated), needed)
|
||||
+ if err != nil {
|
||||
+ klog.Errorf("error in order_devices_by_sibling: %v", err)
|
||||
+ }
|
||||
+ }
|
||||
+ for _, device := range availableDevices {
|
||||
m.allocatedDevices[resource].Insert(device)
|
||||
allocated.Insert(device)
|
||||
needed--
|
||||
--
|
||||
2.25.1
|
||||
|
@ -0,0 +1,10 @@
|
||||
kubeadm-create-platform-pods-with-zero-CPU-resources.patch
|
||||
Revert-use-subpath-for-coredns-only-for-default-repo.patch
|
||||
kubernetes-make-isolcpus-allocation-SMT-aware.patch
|
||||
kubelet-sort-isolcpus-allocation-when-SMT-enabled.patch
|
||||
kubelet-cpumanager-disable-CFS-quota-throttling.patch
|
||||
kubelet-cpumanager-keep-normal-containers-off-reserv.patch
|
||||
kubelet-cpumanager-infra-pods-use-system-reserved-CP.patch
|
||||
kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch
|
||||
enable-support-for-kubernetes-to-ignore-isolcpus.patch
|
||||
kubelet-CFS-quota-throttling-for-non-integer-cpulimit.patch
|
Loading…
Reference in New Issue
Block a user