872dd513fc
Multiple versions of kubernetes are required to support upgrade. This adds staged version of kubernetes 1.21.3, built with a specific version of golang. All subpackage versions are included in the iso image without collisions. The following patches are ported to specific kubernetes version: kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch kubelet-cpumanager-keep-normal-containers-off-reserv.patch kubelet-cpumanager-infrastructure-pods-use-system-re.patch kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch kubeadm-create-platform-pods-with-zero-CPU-resources.patch enable-support-for-kubernetes-to-ignore-isolcpus.patch The following changes were made for 1.21.3: - following upstream commit was reverted: Revert-use-subpath-for-coredns-only-for-default-repo.patch - kubelet-cpumanager-disable-CFS-quota-throttling-for-.patch was refactored due to new internal_container_lifecycle framework We leverage the same mechanism to set Linux resources as: cpu manager: specify the container CPU set during the creation (commit 38dc7509f862f081828e7d9167107b8c6e98ea23). - kubelet-cpumanager-introduce-concept-of-isolated-CPU.patch was refactored due to upstream API change: node: podresources: make GetDevices() consistent (commit ad68f9588c72d6477b5a290c548a9031063ac659). The routine podIsolCPUs() was refactored in 1.21.3 since the API p.deviceManager.GetDevices() is returning multiple devices with a device per cpu. The resultant cpuset needs to be the aggregate. Story: 2008972 Task: 43056 Signed-off-by: Jim Gauld <james.gauld@windriver.com> Change-Id: I5ba7ff2e6aebb744af265698c0f90256ac5e70f4
257 lines
11 KiB
Diff
257 lines
11 KiB
Diff
From 3f69868f7bca99f6875dd4d197b3a974d1b558ed Mon Sep 17 00:00:00 2001
|
|
From: Jim Gauld <james.gauld@windriver.com>
|
|
Date: Wed, 22 Sep 2021 10:09:06 -0400
|
|
Subject: [PATCH 1/7] kubelet cpumanager disable CFS quota throttling for
|
|
Guaranteed pods
|
|
|
|
This disables CFS CPU quota to avoid performance degradation due to
|
|
Linux kernel CFS quota implementation. Note that 4.18 kernel attempts
|
|
to solve the CFS throttling problem, but there are reports that it is
|
|
not completely effective.
|
|
|
|
This disables CFS quota throttling for Guaranteed pods for both
|
|
parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us.
|
|
Disabling has a dramatic latency improvement for HTTP response times.
|
|
|
|
This patch is refactored in 1.21.3 due to new internal_container_lifecycle
|
|
framework. We leverage the same mechanism to set Linux resources as:
|
|
cpu manager: specify the container CPU set during the creation
|
|
(commit 38dc7509f862f081828e7d9167107b8c6e98ea23).
|
|
|
|
Signed-off-by: Jim Gauld <james.gauld@windriver.com>
|
|
---
|
|
pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 ++++
|
|
pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 6 +++
|
|
pkg/kubelet/cm/helpers_linux.go | 10 +++++
|
|
pkg/kubelet/cm/helpers_linux_test.go | 42 ++++++++++---------
|
|
.../cm/internal_container_lifecycle_linux.go | 9 ++++
|
|
5 files changed, 54 insertions(+), 20 deletions(-)
|
|
|
|
diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
index 5a6e5082f15..f7b9c8d07bf 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go
|
|
@@ -72,6 +72,9 @@ type Manager interface {
|
|
// State returns a read-only interface to the internal CPU manager state.
|
|
State() state.Reader
|
|
|
|
+ // GetCPUPolicy returns the assigned CPU manager policy
|
|
+ GetCPUPolicy() string
|
|
+
|
|
// GetTopologyHints implements the topologymanager.HintProvider Interface
|
|
// and is consulted to achieve NUMA aware resource alignment among this
|
|
// and other resource controllers.
|
|
@@ -291,6 +294,10 @@ func (m *manager) State() state.Reader {
|
|
return m.state
|
|
}
|
|
|
|
+func (m *manager) GetCPUPolicy() string {
|
|
+ return m.policy.Name()
|
|
+}
|
|
+
|
|
func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint {
|
|
// Garbage collect any stranded resources before providing TopologyHints
|
|
m.removeStaleState()
|
|
diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
|
index 2c38b52b374..1cb0ea10923 100644
|
|
--- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
|
+++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go
|
|
@@ -28,6 +28,7 @@ import (
|
|
)
|
|
|
|
type fakeManager struct {
|
|
+ policy Policy
|
|
state state.State
|
|
}
|
|
|
|
@@ -69,6 +70,10 @@ func (m *fakeManager) State() state.Reader {
|
|
return m.state
|
|
}
|
|
|
|
+func (m *fakeManager) GetCPUPolicy() string {
|
|
+ return m.policy.Name()
|
|
+}
|
|
+
|
|
func (m *fakeManager) GetCPUs(podUID, containerName string) cpuset.CPUSet {
|
|
klog.InfoS("GetCPUs", "podUID", podUID, "containerName", containerName)
|
|
return cpuset.CPUSet{}
|
|
@@ -82,6 +87,7 @@ func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet {
|
|
// NewFakeManager creates empty/fake cpu manager
|
|
func NewFakeManager() Manager {
|
|
return &fakeManager{
|
|
+ policy: &nonePolicy{},
|
|
state: state.NewMemoryState(),
|
|
}
|
|
}
|
|
diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go
|
|
index aa5c37639dc..302284ef408 100644
|
|
--- a/pkg/kubelet/cm/helpers_linux.go
|
|
+++ b/pkg/kubelet/cm/helpers_linux.go
|
|
@@ -169,6 +169,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64)
|
|
// build the result
|
|
result := &ResourceConfig{}
|
|
if qosClass == v1.PodQOSGuaranteed {
|
|
+ // Disable CFS CPU quota to avoid performance degradation due to
|
|
+ // Linux kernel CFS throttle implementation.
|
|
+ // NOTE: 4.18 kernel attempts to solve CFS throttling problem,
|
|
+ // but there are reports that it is not completely effective.
|
|
+ // This will configure cgroup CFS parameters at pod level:
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_quota_us
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/cpu.cfs_period_us
|
|
+ cpuQuota = int64(-1)
|
|
+ cpuPeriod = uint64(100000)
|
|
+
|
|
result.CpuShares = &cpuShares
|
|
result.CpuQuota = &cpuQuota
|
|
result.CpuPeriod = &cpuPeriod
|
|
diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go
|
|
index 56d765fbc22..8c7309937dd 100644
|
|
--- a/pkg/kubelet/cm/helpers_linux_test.go
|
|
+++ b/pkg/kubelet/cm/helpers_linux_test.go
|
|
@@ -63,8 +63,9 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
burstablePartialShares := MilliCPUToShares(200)
|
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
|
guaranteedShares := MilliCPUToShares(100)
|
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
|
+ guaranteedQuotaPeriod := uint64(100000)
|
|
+ guaranteedQuota := int64(-1)
|
|
+ guaranteedTunedQuota := int64(-1)
|
|
memoryQuantity = resource.MustParse("100Mi")
|
|
cpuNoLimit := int64(-1)
|
|
guaranteedMemory := memoryQuantity.Value()
|
|
@@ -203,8 +204,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement": {
|
|
pod: &v1.Pod{
|
|
@@ -217,8 +218,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -231,8 +232,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -245,8 +246,8 @@ func TestResourceConfigForPod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
}
|
|
|
|
@@ -283,8 +284,9 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
burstablePartialShares := MilliCPUToShares(200)
|
|
burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod))
|
|
guaranteedShares := MilliCPUToShares(100)
|
|
- guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod))
|
|
- guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod))
|
|
+ guaranteedQuotaPeriod := uint64(100000)
|
|
+ guaranteedQuota := int64(-1)
|
|
+ guaranteedTunedQuota := int64(-1)
|
|
memoryQuantity = resource.MustParse("100Mi")
|
|
cpuNoLimit := int64(-1)
|
|
guaranteedMemory := memoryQuantity.Value()
|
|
@@ -423,8 +425,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement": {
|
|
pod: &v1.Pod{
|
|
@@ -437,8 +439,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: defaultQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -451,8 +453,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: true,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
"guaranteed-no-cpu-enforcement-with-tuned-quota": {
|
|
pod: &v1.Pod{
|
|
@@ -465,8 +467,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) {
|
|
},
|
|
},
|
|
enforceCPULimits: false,
|
|
- quotaPeriod: tunedQuotaPeriod,
|
|
- expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory},
|
|
+ quotaPeriod: guaranteedQuotaPeriod,
|
|
+ expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory},
|
|
},
|
|
}
|
|
|
|
diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
|
index 9cf41620b8c..fa15dbe1671 100644
|
|
--- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
|
+++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go
|
|
@@ -19,10 +19,12 @@ limitations under the License.
|
|
package cm
|
|
|
|
import (
|
|
+ //"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"k8s.io/api/core/v1"
|
|
+ v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
|
|
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
|
|
)
|
|
|
|
@@ -32,6 +34,13 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain
|
|
if !allocatedCPUs.IsEmpty() {
|
|
containerConfig.Linux.Resources.CpusetCpus = allocatedCPUs.String()
|
|
}
|
|
+ // Disable cgroup CFS throttle at the container level.
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_quota_us
|
|
+ // /sys/fs/cgroup/cpu/k8s-infra/kubepods/<pod>/<container>/cpu.cfs_period_us
|
|
+ if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed {
|
|
+ containerConfig.Linux.Resources.CpuPeriod = int64(100000)
|
|
+ containerConfig.Linux.Resources.CpuQuota = int64(-1)
|
|
+ }
|
|
}
|
|
|
|
if i.memoryManager != nil {
|
|
--
|
|
2.17.1
|
|
|