integ/kubernetes/kubernetes-1.21.3/centos/files/kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch
Jim Gauld 872dd513fc Add staged kubernetes version 1.21.3
Multiple versions of kubernetes are required to support upgrade.

This adds staged version of kubernetes 1.21.3, built with a
specific version of golang.

All subpackage versions are included in the iso image without
collisions.

The following patches are ported to specific kubernetes version:
kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch
kubelet-cpumanager-keep-normal-containers-off-reserv.patch
kubelet-cpumanager-infrastructure-pods-use-system-re.patch
kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch
kubeadm-create-platform-pods-with-zero-CPU-resources.patch
enable-support-for-kubernetes-to-ignore-isolcpus.patch

The following changes were made for 1.21.3:
- following upstream commit was reverted:
  Revert-use-subpath-for-coredns-only-for-default-repo.patch

- kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch
  was refactored due to new internal_container_lifecycle framework
  We leverage the same mechanism to set Linux resources as:
  cpu manager: specify the container CPU set during the creation
  (commit 38dc7509f862f081828e7d9167107b8c6e98ea23).

- kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch
  was refactored due to upstream API change:
  node: podresources: make GetDevices() consistent
  (commit ad68f9588c72d6477b5a290c548a9031063ac659).

  The routine podIsolCPUs() was refactored in 1.21.3 since the
  API p.deviceManager.GetDevices() is returning multiple devices
  with a device per cpu. The resultant cpuset needs to be the
  aggregate.

Story: 2008972
Task: 43056

Signed-off-by: Jim Gauld <james.gauld@windriver.com>
Change-Id: I5ba7ff2e6aebb744af265698c0f90256ac5e70f4
2021-09-22 16:31:39 -04:00

257 lines
11 KiB
Diff

From 3f69868f7bca99f6875dd4d197b3a974d1b558ed Mon Sep 17 00:00:00 2001
From: Jim Gauld <james.gauld@windriver.com>
Date: Wed, 22 Sep 2021 10:09:06 -0400
Subject: [PATCH 1/7] kubelet cpumanager disable CFS quota throttling for
Guaranteed pods
This disables CFS CPU quota to avoid performance degradation due to
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
to solve the CFS throttling problem, but there are reports that it is
not completely effective.
This disables CFS quota throttling for Guaranteed pods for both
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
Disabling has a dramatic latency improvement for HTTP response times.
This patch is refactored in 1.21.3 due to new internal_container_lifecycle
framework. We leverage the same mechanism to set Linux resources as:
cpu manager: specify the container CPU set during the creation
(commit 38dc7509f862f081828e7d9167107b8c6e98ea23).
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
---
pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 ++++
pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 6 +++
pkg/kubelet/cm/helpers_linux.go | 10 +++++
pkg/kubelet/cm/helpers_linux_test.go | 42 ++++++++++---------
.../cm/internal_container_lifecycle_linux.go | 9 ++++
5 files changed, 54 insertions(+), 20 deletions(-)
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
index 5a6e5082f15..f7b9c8d07bf 100644
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
@@ -72,6 +72,9 @@ type Manager interface {
// State returns a read-only interface to the internal CPU manager state.
State() state.Reader
+ // GetCPUPolicy returns the assigned CPU manager policy
+ GetCPUPolicy() string
+
// GetTopologyHints implements the topologymanager.HintProvider Interface
// and is consulted to achieve NUMA aware resource alignment among this
// and other resource controllers.
@@ -291,6 +294,10 @@ func (m *manager) State() state.Reader {
return m.state
}
+func (m *manager) GetCPUPolicy() string {
+ return m.policy.Name()
+}
+
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
// Garbage collect any stranded resources before providing TopologyHints
m.removeStaleState()
diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
index 2c38b52b374..1cb0ea10923 100644
--- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
+++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
@@ -28,6 +28,7 @@ import (
)
type fakeManager struct {
+ policy Policy
state state.State
}
@@ -69,6 +70,10 @@ func (m *fakeManager) State() state.Reader {
return m.state
}
+func (m *fakeManager) GetCPUPolicy() string {
+ return m.policy.Name()
+}
+
func (m *fakeManager) GetCPUs(podUID, containerName string) cpuset.CPUSet {
klog.InfoS("GetCPUs", "podUID", podUID, "containerName", containerName)
return cpuset.CPUSet{}
@@ -82,6 +87,7 @@ func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet {
// NewFakeManager creates empty/fake cpu manager
func NewFakeManager() Manager {
return &fakeManager{
+ policy: &nonePolicy{},
state: state.NewMemoryState(),
}
}
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
index aa5c37639dc..302284ef408 100644
--- a/pkg/kubelet/cm/helpers_linux.go
+++ b/pkg/kubelet/cm/helpers_linux.go
@@ -169,6 +169,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
// build the result
result := &ResourceConfig{}
if qosClass == v1.PodQOSGuaranteed {
+ // Disable CFS CPU quota to avoid performance degradation due to
+ // Linux kernel CFS throttle implementation.
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
+ // but there are reports that it is not completely effective.
+ // This will configure cgroup CFS parameters at pod level:
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_quota_us
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_period_us
+ cpuQuota = int64(-1)
+ cpuPeriod = uint64(100000)
+
result.CpuShares = &cpuShares
result.CpuQuota = &cpuQuota
result.CpuPeriod = &cpuPeriod
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
index 56d765fbc22..8c7309937dd 100644
--- a/pkg/kubelet/cm/helpers_linux_test.go
+++ b/pkg/kubelet/cm/helpers_linux_test.go
@@ -63,8 +63,9 @@ func TestResourceConfigForPod(t *testing.T) {
burstablePartialShares := MilliCPUToShares(200)
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
guaranteedShares := MilliCPUToShares(100)
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
+ guaranteedQuotaPeriod := uint64(100000)
+ guaranteedQuota := int64(-1)
+ guaranteedTunedQuota := int64(-1)
memoryQuantity = resource.MustParse("100Mi")
cpuNoLimit := int64(-1)
guaranteedMemory := memoryQuantity.Value()
@@ -203,8 +204,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
enforceCPULimits: true,
- quotaPeriod: defaultQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
"guaranteed-no-cpu-enforcement": {
pod: &v1.Pod{
@@ -217,8 +218,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
enforceCPULimits: false,
- quotaPeriod: defaultQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
"guaranteed-with-tuned-quota": {
pod: &v1.Pod{
@@ -231,8 +232,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
enforceCPULimits: true,
- quotaPeriod: tunedQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
pod: &v1.Pod{
@@ -245,8 +246,8 @@ func TestResourceConfigForPod(t *testing.T) {
},
},
enforceCPULimits: false,
- quotaPeriod: tunedQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
}
@@ -283,8 +284,9 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
burstablePartialShares := MilliCPUToShares(200)
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
guaranteedShares := MilliCPUToShares(100)
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
+ guaranteedQuotaPeriod := uint64(100000)
+ guaranteedQuota := int64(-1)
+ guaranteedTunedQuota := int64(-1)
memoryQuantity = resource.MustParse("100Mi")
cpuNoLimit := int64(-1)
guaranteedMemory := memoryQuantity.Value()
@@ -423,8 +425,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
},
},
enforceCPULimits: true,
- quotaPeriod: defaultQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
"guaranteed-no-cpu-enforcement": {
pod: &v1.Pod{
@@ -437,8 +439,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
},
},
enforceCPULimits: false,
- quotaPeriod: defaultQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
"guaranteed-with-tuned-quota": {
pod: &v1.Pod{
@@ -451,8 +453,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
},
},
enforceCPULimits: true,
- quotaPeriod: tunedQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
pod: &v1.Pod{
@@ -465,8 +467,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
},
},
enforceCPULimits: false,
- quotaPeriod: tunedQuotaPeriod,
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
+ quotaPeriod: guaranteedQuotaPeriod,
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
},
}
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
index 9cf41620b8c..fa15dbe1671 100644
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
@@ -19,10 +19,12 @@ limitations under the License.
package cm
import (
+ //"fmt"
"strconv"
"strings"
"k8s.io/api/core/v1"
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
@@ -32,6 +34,13 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
if !allocatedCPUs.IsEmpty() {
containerConfig.Linux.Resources.CpusetCpus = allocatedCPUs.String()
}
+ // Disable cgroup CFS throttle at the container level.
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
+ containerConfig.Linux.Resources.CpuPeriod = int64(100000)
+ containerConfig.Linux.Resources.CpuQuota = int64(-1)
+ }
}
if i.memoryManager != nil {
--
2.17.1