From 56a91fec13cd018f998a0bdf0beed753efd94df5 Mon Sep 17 00:00:00 2001 From: Alex Kozyrev Date: Fri, 26 Jul 2019 13:58:17 -0400 Subject: [PATCH] Backport the fix for deadlock in CFS-bandwidth timer locking Low-latency profile of StarlingX is affected by a deadlock in CFS scheduler. spin_lock is used in IRQ handler there instead of spin_lock_irqsave. This leads to an attempt to lock the same spinlock twice and inevitable system freeze. Backporting c0ad4aa4d8 commit from upstream kernel to cure the issue. Change-Id: I5416c0e0886f42d2bcec8e3e5da063e6af6916f8 Closes-bug: 1832854 Signed-off-by: Alex Kozyrev --- kernel/kernel-rt/centos/build_srpm.data | 2 +- .../centos/meta_patches/Compile-issues.patch | 22 +-- .../Kernel-source-patches-for-TiC.patch | 21 ++- ...obustify-CFS-bandwidth-timer-locking.patch | 166 ++++++++++++++++++ 4 files changed, 188 insertions(+), 23 deletions(-) create mode 100644 kernel/kernel-rt/centos/patches/robustify-CFS-bandwidth-timer-locking.patch diff --git a/kernel/kernel-rt/centos/build_srpm.data b/kernel/kernel-rt/centos/build_srpm.data index dab446664..2ed119b61 100644 --- a/kernel/kernel-rt/centos/build_srpm.data +++ b/kernel/kernel-rt/centos/build_srpm.data @@ -1,4 +1,4 @@ COPY_LIST="files/*" -TIS_PATCH_VER=2 +TIS_PATCH_VER=3 BUILD_IS_BIG=11 BUILD_IS_SLOW=12 diff --git a/kernel/kernel-rt/centos/meta_patches/Compile-issues.patch b/kernel/kernel-rt/centos/meta_patches/Compile-issues.patch index 3b345a861..b409d56b3 100644 --- a/kernel/kernel-rt/centos/meta_patches/Compile-issues.patch +++ b/kernel/kernel-rt/centos/meta_patches/Compile-issues.patch @@ -1,9 +1,9 @@ -From 8fbd0edbcf6ed51b9e58d267d0ce7b40f00118a1 Mon Sep 17 00:00:00 2001 -From: Bart Wensley -Date: Tue, 9 Jul 2019 07:18:00 -0500 +From 6fe892d415b3d728d223069eacb6f291fc38d86d Mon Sep 17 00:00:00 2001 +From: Alex Kozyrev +Date: Mon, 29 Jul 2019 11:48:51 -0400 Subject: [PATCH 1/1] Compile issues -Signed-off-by: Bart Wensley +Signed-off-by: Alex Kozyrev --- SPECS/kernel-rt.spec | 8 ++++++++ 1 file changed, 8 insertions(+) @@ -12,22 +12,22 @@ diff --git a/SPECS/kernel-rt.spec b/SPECS/kernel-rt.spec index 3b7985c..5025db7 100644 --- a/SPECS/kernel-rt.spec +++ b/SPECS/kernel-rt.spec -@@ -418,6 +418,11 @@ Patch1027: dpt_i2o-fix-build-warning.patch - # DRBD was choking on write same +@@ -418,6 +418,11 @@ # DRBD was choking on write same Patch1028: turn-off-write-same-in-smartqpi-driver.patch Patch1029: restrict-iSCSI-kthreads-to-CPUs-in-cpu_kthread_mask.patch -+Patch1030: fix-compilation-issues.patch + Patch1030: robustify-CFS-bandwidth-timer-locking.patch ++Patch1031: fix-compilation-issues.patch +# Fix CentOS 7.6 upgrade compile error -+Patch1031: fix-CentOS-7.6-upgrade-compile-error.patch ++Patch1032: fix-CentOS-7.6-upgrade-compile-error.patch +# Compile fix for disabling CONFIG_MEMCG_KMEM -+Patch1032: compile-fix-for-disabling-CONFIG_MEMCG_KMEM.patch ++Patch1033: compile-fix-for-disabling-CONFIG_MEMCG_KMEM.patch BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root -@@ -780,6 +785,9 @@ ApplyPatch aic94xx-Skip-reading-user-settings-if-flash-is-not-f.patch - ApplyPatch dpt_i2o-fix-build-warning.patch +@@ -781,6 +786,9 @@ ApplyPatch dpt_i2o-fix-build-warning.patch ApplyPatch turn-off-write-same-in-smartqpi-driver.patch ApplyPatch restrict-iSCSI-kthreads-to-CPUs-in-cpu_kthread_mask.patch + ApplyPatch robustify-CFS-bandwidth-timer-locking.patch +ApplyPatch fix-compilation-issues.patch +ApplyPatch fix-CentOS-7.6-upgrade-compile-error.patch +ApplyPatch compile-fix-for-disabling-CONFIG_MEMCG_KMEM.patch diff --git a/kernel/kernel-rt/centos/meta_patches/Kernel-source-patches-for-TiC.patch b/kernel/kernel-rt/centos/meta_patches/Kernel-source-patches-for-TiC.patch index b899b65a4..bd64abf84 100644 --- a/kernel/kernel-rt/centos/meta_patches/Kernel-source-patches-for-TiC.patch +++ b/kernel/kernel-rt/centos/meta_patches/Kernel-source-patches-for-TiC.patch @@ -1,21 +1,18 @@ -From 4412b4d092c2f38feed1d0f8ea1a69c799315663 Mon Sep 17 00:00:00 2001 -Message-Id: <4412b4d092c2f38feed1d0f8ea1a69c799315663.1528227675.git.Jim.Somerville@windriver.com> -In-Reply-To: -References: -From: Jim Somerville -Date: Mon, 23 Apr 2018 15:19:36 -0400 +From 6a04eb3881ccb3c592b4b47d36bde90f1e33c598 Mon Sep 17 00:00:00 2001 +From: Alex Kozyrev +Date: Mon, 29 Jul 2019 11:48:49 -0400 Subject: [PATCH 2/3] Kernel source patches for TiC -Signed-off-by: Jim Somerville +Signed-off-by: Alex Kozyrev --- - SPECS/kernel-rt.spec | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 62 insertions(+) + SPECS/kernel-rt.spec | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 64 insertions(+) diff --git a/SPECS/kernel-rt.spec b/SPECS/kernel-rt.spec index 905ae52..15114e6 100644 --- a/SPECS/kernel-rt.spec +++ b/SPECS/kernel-rt.spec -@@ -386,6 +386,38 @@ Source1000: modprobe-dccp-blacklist.conf +@@ -386,6 +386,39 @@ Source1000: modprobe-dccp-blacklist.conf # Empty final patch file to facilitate testing of kernel patches Patch999999: linux-kernel-test.patch @@ -51,10 +48,11 @@ index 905ae52..15114e6 100644 +# DRBD was choking on write same +Patch1028: turn-off-write-same-in-smartqpi-driver.patch +Patch1029: restrict-iSCSI-kthreads-to-CPUs-in-cpu_kthread_mask.patch ++Patch1030: robustify-CFS-bandwidth-timer-locking.patch BuildRoot: %{_tmppath}/kernel-%{KVERREL}-root -@@ -718,6 +750,36 @@ cp %{SOURCE38} . +@@ -718,6 +751,37 @@ cp %{SOURCE38} . ## Apply Patches here ApplyPatch linux-kernel-test.patch @@ -88,6 +86,7 @@ index 905ae52..15114e6 100644 +ApplyPatch dpt_i2o-fix-build-warning.patch +ApplyPatch turn-off-write-same-in-smartqpi-driver.patch +ApplyPatch restrict-iSCSI-kthreads-to-CPUs-in-cpu_kthread_mask.patch ++ApplyPatch robustify-CFS-bandwidth-timer-locking.patch # move off upstream version mechanism if [ -e localversion-rt ]; then diff --git a/kernel/kernel-rt/centos/patches/robustify-CFS-bandwidth-timer-locking.patch b/kernel/kernel-rt/centos/patches/robustify-CFS-bandwidth-timer-locking.patch new file mode 100644 index 000000000..80e117d76 --- /dev/null +++ b/kernel/kernel-rt/centos/patches/robustify-CFS-bandwidth-timer-locking.patch @@ -0,0 +1,166 @@ +From c0ad4aa4d8416a39ad262a2bd68b30acd951bf0e Mon Sep 17 00:00:00 2001 +From: Peter Zijlstra +Date: Mon, 7 Jan 2019 13:52:31 +0100 +Subject: sched/fair: Robustify CFS-bandwidth timer locking + +Traditionally hrtimer callbacks were run with IRQs disabled, but with +the introduction of HRTIMER_MODE_SOFT it is possible they run from +SoftIRQ context, which does _NOT_ have IRQs disabled. + +Allow for the CFS bandwidth timers (period_timer and slack_timer) to +be ran from SoftIRQ context; this entails removing the assumption that +IRQs are already disabled from the locking. + +While mainline doesn't strictly need this, -RT forces all timers not +explicitly marked with MODE_HARD into MODE_SOFT and trips over this. +And marking these timers as MODE_HARD doesn't make sense as they're +not required for RT operation and can potentially be quite expensive. + +Reported-by: Tom Putzeys +Tested-by: Mike Galbraith +Signed-off-by: Peter Zijlstra (Intel) +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sebastian Andrzej Siewior +Cc: Thomas Gleixner +Link: https://lkml.kernel.org/r/20190107125231.GE14122@hirez.programming.kicks-ass.net +Signed-off-by: Ingo Molnar +Signed-off-by: Alex Kozyrev + +--- + kernel/sched/fair.c | 33 ++++++++++++++++++--------------- + 1 file changed, 18 insertions(+), 15 deletions(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index d3d746b..e9a8d95 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -3488,13 +3488,14 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + struct cfs_rq *cfs_rq; + u64 runtime; + u64 starting_runtime = remaining; ++ unsigned long flags; + + rcu_read_lock(); + list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, + throttled_list) { + struct rq *rq = rq_of(cfs_rq); + +- raw_spin_lock(&rq->lock); ++ raw_spin_lock_irqsave(&rq->lock, flags); + if (!cfs_rq_throttled(cfs_rq)) + goto next; + +@@ -3511,7 +3512,7 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, + unthrottle_cfs_rq(cfs_rq); + + next: +- raw_spin_unlock(&rq->lock); ++ raw_spin_unlock_irqrestore(&rq->lock, flags); + + if (!remaining) + break; +@@ -3527,7 +3528,7 @@ next: + * period the timer is deactivated until scheduling resumes; cfs_b->idle is + * used to track this state. + */ +-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) ++static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags) + { + u64 runtime, runtime_expires; + int throttled; +@@ -3576,11 +3577,11 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) + while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) { + runtime = cfs_b->runtime; + cfs_b->distribute_running = 1; +- raw_spin_unlock(&cfs_b->lock); ++ raw_spin_unlock_irqrestore(&cfs_b->lock, flags); + /* we can't nest cfs_b->lock while distributing bandwidth */ + runtime = distribute_cfs_runtime(cfs_b, runtime, + runtime_expires); +- raw_spin_lock(&cfs_b->lock); ++ raw_spin_lock_irqsave(&cfs_b->lock, flags); + + cfs_b->distribute_running = 0; + throttled = !list_empty(&cfs_b->throttled_cfs_rq); +@@ -3689,17 +3690,18 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) + static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) + { + u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); ++ unsigned long flags; + u64 expires; + + /* confirm we're still not at a refresh boundary */ +- raw_spin_lock(&cfs_b->lock); ++ raw_spin_lock_irqsave(&cfs_b->lock, flags); + if (cfs_b->distribute_running) { +- raw_spin_unlock(&cfs_b->lock); ++ raw_spin_unlock_irqrestore(&cfs_b->lock, flags); + return; + } + + if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) { +- raw_spin_unlock(&cfs_b->lock); ++ raw_spin_unlock_irqrestore(&cfs_b->lock, flags); + return; + } + +@@ -3710,18 +3712,18 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) + if (runtime) + cfs_b->distribute_running = 1; + +- raw_spin_unlock(&cfs_b->lock); ++ raw_spin_unlock_irqrestore(&cfs_b->lock, flags); + + if (!runtime) + return; + + runtime = distribute_cfs_runtime(cfs_b, runtime, expires); + +- raw_spin_lock(&cfs_b->lock); ++ raw_spin_lock_irqsave(&cfs_b->lock, flags); + if (expires == cfs_b->runtime_expires) + cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->distribute_running = 0; +- raw_spin_unlock(&cfs_b->lock); ++ raw_spin_unlock_irqrestore(&cfs_b->lock, flags); + } + + /* +@@ -3785,7 +3787,7 @@ static void check_cfs_rq_runtime(struct cfs_rq *cfs_rq) + } + + static inline u64 default_cfs_period(void); +-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun); ++static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrunn, unsigned long flags); + static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b); + + static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) +@@ -3802,10 +3804,11 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) + struct cfs_bandwidth *cfs_b = + container_of(timer, struct cfs_bandwidth, period_timer); + ktime_t now; ++ unsigned long flags; + int overrun; + int idle = 0; + +- raw_spin_lock(&cfs_b->lock); ++ raw_spin_lock_irqsave(&cfs_b->lock, flags); + for (;;) { + now = hrtimer_cb_get_time(timer); + overrun = hrtimer_forward(timer, now, cfs_b->period); +@@ -3813,9 +3816,9 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) + if (!overrun) + break; + +- idle = do_sched_cfs_period_timer(cfs_b, overrun); ++ idle = do_sched_cfs_period_timer(cfs_b, overrun, flags); + } +- raw_spin_unlock(&cfs_b->lock); ++ raw_spin_unlock_irqrestore(&cfs_b->lock, flags); + + return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; + } +-- +1.8.3.1 +