From 3f69868f7bca99f6875dd4d197b3a974d1b558ed Mon Sep 17 00:00:00 2001 From: Jim Gauld Date: Wed, 22 Sep 2021 10:09:06 -0400 Subject: [PATCH 1/7] kubelet cpumanager disable CFS quota throttling for Guaranteed pods This disables CFS CPU quota to avoid performance degradation due to Linux kernel CFS quota implementation. Note that 4.18 kernel attempts to solve the CFS throttling problem, but there are reports that it is not completely effective. This disables CFS quota throttling for Guaranteed pods for both parent and container cgroups by writing -1 to cgroup cpu.cfs_quota_us. Disabling has a dramatic latency improvement for HTTP response times. This patch is refactored in 1.21.3 due to new internal_container_lifecycle framework. We leverage the same mechanism to set Linux resources as: cpu manager: specify the container CPU set during the creation (commit 38dc7509f862f081828e7d9167107b8c6e98ea23). Signed-off-by: Jim Gauld --- pkg/kubelet/cm/cpumanager/cpu_manager.go | 7 ++++ pkg/kubelet/cm/cpumanager/fake_cpu_manager.go | 6 +++ pkg/kubelet/cm/helpers_linux.go | 10 +++++ pkg/kubelet/cm/helpers_linux_test.go | 42 ++++++++++--------- .../cm/internal_container_lifecycle_linux.go | 9 ++++ 5 files changed, 54 insertions(+), 20 deletions(-) diff --git a/pkg/kubelet/cm/cpumanager/cpu_manager.go b/pkg/kubelet/cm/cpumanager/cpu_manager.go index 5a6e5082f15..f7b9c8d07bf 100644 --- a/pkg/kubelet/cm/cpumanager/cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/cpu_manager.go @@ -72,6 +72,9 @@ type Manager interface { // State returns a read-only interface to the internal CPU manager state. State() state.Reader + // GetCPUPolicy returns the assigned CPU manager policy + GetCPUPolicy() string + // GetTopologyHints implements the topologymanager.HintProvider Interface // and is consulted to achieve NUMA aware resource alignment among this // and other resource controllers. @@ -291,6 +294,10 @@ func (m *manager) State() state.Reader { return m.state } +func (m *manager) GetCPUPolicy() string { + return m.policy.Name() +} + func (m *manager) GetTopologyHints(pod *v1.Pod, container *v1.Container) map[string][]topologymanager.TopologyHint { // Garbage collect any stranded resources before providing TopologyHints m.removeStaleState() diff --git a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go index 2c38b52b374..1cb0ea10923 100644 --- a/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go +++ b/pkg/kubelet/cm/cpumanager/fake_cpu_manager.go @@ -28,6 +28,7 @@ import ( ) type fakeManager struct { + policy Policy state state.State } @@ -69,6 +70,10 @@ func (m *fakeManager) State() state.Reader { return m.state } +func (m *fakeManager) GetCPUPolicy() string { + return m.policy.Name() +} + func (m *fakeManager) GetCPUs(podUID, containerName string) cpuset.CPUSet { klog.InfoS("GetCPUs", "podUID", podUID, "containerName", containerName) return cpuset.CPUSet{} @@ -82,6 +87,7 @@ func (m *fakeManager) GetAllocatableCPUs() cpuset.CPUSet { // NewFakeManager creates empty/fake cpu manager func NewFakeManager() Manager { return &fakeManager{ + policy: &nonePolicy{}, state: state.NewMemoryState(), } } diff --git a/pkg/kubelet/cm/helpers_linux.go b/pkg/kubelet/cm/helpers_linux.go index aa5c37639dc..302284ef408 100644 --- a/pkg/kubelet/cm/helpers_linux.go +++ b/pkg/kubelet/cm/helpers_linux.go @@ -169,6 +169,16 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64) // build the result result := &ResourceConfig{} if qosClass == v1.PodQOSGuaranteed { + // Disable CFS CPU quota to avoid performance degradation due to + // Linux kernel CFS throttle implementation. + // NOTE: 4.18 kernel attempts to solve CFS throttling problem, + // but there are reports that it is not completely effective. + // This will configure cgroup CFS parameters at pod level: + // /sys/fs/cgroup/cpu/k8s-infra/kubepods//cpu.cfs_quota_us + // /sys/fs/cgroup/cpu/k8s-infra/kubepods//cpu.cfs_period_us + cpuQuota = int64(-1) + cpuPeriod = uint64(100000) + result.CpuShares = &cpuShares result.CpuQuota = &cpuQuota result.CpuPeriod = &cpuPeriod diff --git a/pkg/kubelet/cm/helpers_linux_test.go b/pkg/kubelet/cm/helpers_linux_test.go index 56d765fbc22..8c7309937dd 100644 --- a/pkg/kubelet/cm/helpers_linux_test.go +++ b/pkg/kubelet/cm/helpers_linux_test.go @@ -63,8 +63,9 @@ func TestResourceConfigForPod(t *testing.T) { burstablePartialShares := MilliCPUToShares(200) burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod)) guaranteedShares := MilliCPUToShares(100) - guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod)) - guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod)) + guaranteedQuotaPeriod := uint64(100000) + guaranteedQuota := int64(-1) + guaranteedTunedQuota := int64(-1) memoryQuantity = resource.MustParse("100Mi") cpuNoLimit := int64(-1) guaranteedMemory := memoryQuantity.Value() @@ -203,8 +204,8 @@ func TestResourceConfigForPod(t *testing.T) { }, }, enforceCPULimits: true, - quotaPeriod: defaultQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, "guaranteed-no-cpu-enforcement": { pod: &v1.Pod{ @@ -217,8 +218,8 @@ func TestResourceConfigForPod(t *testing.T) { }, }, enforceCPULimits: false, - quotaPeriod: defaultQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, "guaranteed-with-tuned-quota": { pod: &v1.Pod{ @@ -231,8 +232,8 @@ func TestResourceConfigForPod(t *testing.T) { }, }, enforceCPULimits: true, - quotaPeriod: tunedQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, "guaranteed-no-cpu-enforcement-with-tuned-quota": { pod: &v1.Pod{ @@ -245,8 +246,8 @@ func TestResourceConfigForPod(t *testing.T) { }, }, enforceCPULimits: false, - quotaPeriod: tunedQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, } @@ -283,8 +284,9 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { burstablePartialShares := MilliCPUToShares(200) burstableQuota := MilliCPUToQuota(200, int64(defaultQuotaPeriod)) guaranteedShares := MilliCPUToShares(100) - guaranteedQuota := MilliCPUToQuota(100, int64(defaultQuotaPeriod)) - guaranteedTunedQuota := MilliCPUToQuota(100, int64(tunedQuotaPeriod)) + guaranteedQuotaPeriod := uint64(100000) + guaranteedQuota := int64(-1) + guaranteedTunedQuota := int64(-1) memoryQuantity = resource.MustParse("100Mi") cpuNoLimit := int64(-1) guaranteedMemory := memoryQuantity.Value() @@ -423,8 +425,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { }, }, enforceCPULimits: true, - quotaPeriod: defaultQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, "guaranteed-no-cpu-enforcement": { pod: &v1.Pod{ @@ -437,8 +439,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { }, }, enforceCPULimits: false, - quotaPeriod: defaultQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &defaultQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, "guaranteed-with-tuned-quota": { pod: &v1.Pod{ @@ -451,8 +453,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { }, }, enforceCPULimits: true, - quotaPeriod: tunedQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &guaranteedTunedQuota, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, "guaranteed-no-cpu-enforcement-with-tuned-quota": { pod: &v1.Pod{ @@ -465,8 +467,8 @@ func TestResourceConfigForPodWithCustomCPUCFSQuotaPeriod(t *testing.T) { }, }, enforceCPULimits: false, - quotaPeriod: tunedQuotaPeriod, - expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &tunedQuotaPeriod, Memory: &guaranteedMemory}, + quotaPeriod: guaranteedQuotaPeriod, + expected: &ResourceConfig{CpuShares: &guaranteedShares, CpuQuota: &cpuNoLimit, CpuPeriod: &guaranteedQuotaPeriod, Memory: &guaranteedMemory}, }, } diff --git a/pkg/kubelet/cm/internal_container_lifecycle_linux.go b/pkg/kubelet/cm/internal_container_lifecycle_linux.go index 9cf41620b8c..fa15dbe1671 100644 --- a/pkg/kubelet/cm/internal_container_lifecycle_linux.go +++ b/pkg/kubelet/cm/internal_container_lifecycle_linux.go @@ -19,10 +19,12 @@ limitations under the License. package cm import ( + //"fmt" "strconv" "strings" "k8s.io/api/core/v1" + v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) @@ -32,6 +34,13 @@ func (i *internalContainerLifecycleImpl) PreCreateContainer(pod *v1.Pod, contain if !allocatedCPUs.IsEmpty() { containerConfig.Linux.Resources.CpusetCpus = allocatedCPUs.String() } + // Disable cgroup CFS throttle at the container level. + // /sys/fs/cgroup/cpu/k8s-infra/kubepods///cpu.cfs_quota_us + // /sys/fs/cgroup/cpu/k8s-infra/kubepods///cpu.cfs_period_us + if i.cpuManager.GetCPUPolicy() == "static" && v1qos.GetPodQOS(pod) == v1.PodQOSGuaranteed { + containerConfig.Linux.Resources.CpuPeriod = int64(100000) + containerConfig.Linux.Resources.CpuQuota = int64(-1) + } } if i.memoryManager != nil { -- 2.17.1