diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-25 10:21:21 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-01-25 10:21:21 -0800 |
commit | 3cb9871f8160a4789189701eda5af582f3414e55 (patch) | |
tree | 65e2c1dc9eb7833911c774f39cbb3dd1af00997a | |
parent | 6098d87eaf31f48153c984e2adadf14762520a87 (diff) | |
parent | e787644caf7628ad3269c1fbd321c3255cf51710 (diff) |
Merge tag 'urgent-rcu.2024.01.24a' of https://github.com/neeraju/linux
Pull RCU fix from Neeraj Upadhyay:
"This fixes RCU grace period stalls, which are observed when an
outgoing CPU's quiescent state reporting results in wakeup of one of
the grace period kthreads, to complete the grace period.
If those kthreads have SCHED_FIFO policy, the wake up can indirectly
arm the RT bandwith timer to the local offline CPU.
Earlier migration of the hrtimers from the CPU introduced in commit
5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU
earlier") results in this timer getting ignored.
If the RCU grace period kthreads are waiting for RT bandwidth to be
available, they may never be actually scheduled, resulting in RCU
stall warnings"
* tag 'urgent-rcu.2024.01.24a' of https://github.com/neeraju/linux:
rcu: Defer RCU kthreads wakeup when CPU is dying
-rw-r--r-- | kernel/rcu/tree.c | 34 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 3 |
2 files changed, 34 insertions, 3 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 1ae851777806..b2bccfd37c38 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1013,6 +1013,38 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp) return needmore; } +static void swake_up_one_online_ipi(void *arg) +{ + struct swait_queue_head *wqh = arg; + + swake_up_one(wqh); +} + +static void swake_up_one_online(struct swait_queue_head *wqh) +{ + int cpu = get_cpu(); + + /* + * If called from rcutree_report_cpu_starting(), wake up + * is dangerous that late in the CPU-down hotplug process. The + * scheduler might queue an ignored hrtimer. Defer the wake up + * to an online CPU instead. + */ + if (unlikely(cpu_is_offline(cpu))) { + int target; + + target = cpumask_any_and(housekeeping_cpumask(HK_TYPE_RCU), + cpu_online_mask); + + smp_call_function_single(target, swake_up_one_online_ipi, + wqh, 0); + put_cpu(); + } else { + put_cpu(); + swake_up_one(wqh); + } +} + /* * Awaken the grace-period kthread. Don't do a self-awaken (unless in an * interrupt or softirq handler, in which case we just might immediately @@ -1037,7 +1069,7 @@ static void rcu_gp_kthread_wake(void) return; WRITE_ONCE(rcu_state.gp_wake_time, jiffies); WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq)); - swake_up_one(&rcu_state.gp_wq); + swake_up_one_online(&rcu_state.gp_wq); } /* diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d7cea5d591f..2ac440bc7e10 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -173,7 +173,6 @@ static bool sync_rcu_exp_done_unlocked(struct rcu_node *rnp) return ret; } - /* * Report the exit from RCU read-side critical section for the last task * that queued itself during or before the current expedited preemptible-RCU @@ -201,7 +200,7 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (wake) { smp_mb(); /* EGP done before wake_up(). */ - swake_up_one(&rcu_state.expedited_wq); + swake_up_one_online(&rcu_state.expedited_wq); } break; } |