From b96e7a5fa0ba9cda32888e04f8f4bac42d49a7f8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 5 Sep 2023 00:02:11 +0000 Subject: rcu/tree: Defer setting of jiffies during stall reset There are instances where rcu_cpu_stall_reset() is called when jiffies did not get a chance to update for a long time. Before jiffies is updated, the CPU stall detector can go off triggering false-positives where a just-started grace period appears to be ages old. In the past, we disabled stall detection in rcu_cpu_stall_reset() however this got changed [1]. This is resulting in false-positives in KGDB usecase [2]. Fix this by deferring the update of jiffies to the third run of the FQS loop. This is more robust, as, even if rcu_cpu_stall_reset() is called just before jiffies is read, we would end up pushing out the jiffies read by 3 more FQS loops. Meanwhile the CPU stall detection will be delayed and we will not get any false positives. [1] https://lore.kernel.org/all/20210521155624.174524-2-senozhatsky@chromium.org/ [2] https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Tested with rcutorture.cpu_stall option as well to verify stall behavior with/without patch. Tested-by: Huacai Chen Reported-by: Binbin Zhou Closes: https://lore.kernel.org/all/20230814020045.51950-2-chenhuacai@loongson.cn/ Suggested-by: Paul McKenney Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: a80be428fbc1 ("rcu: Do not disable GP stall detection in rcu_cpu_stall_reset()") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb1caefa8bd0..d85779f67aea 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1556,10 +1556,22 @@ static bool rcu_gp_fqs_check_wake(int *gfp) */ static void rcu_gp_fqs(bool first_time) { + int nr_fqs = READ_ONCE(rcu_state.nr_fqs_jiffies_stall); struct rcu_node *rnp = rcu_get_root(); WRITE_ONCE(rcu_state.gp_activity, jiffies); WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); + + WARN_ON_ONCE(nr_fqs > 3); + /* Only countdown nr_fqs for stall purposes if jiffies moves. */ + if (nr_fqs) { + if (nr_fqs == 1) { + WRITE_ONCE(rcu_state.jiffies_stall, + jiffies + rcu_jiffies_till_stall_check()); + } + WRITE_ONCE(rcu_state.nr_fqs_jiffies_stall, --nr_fqs); + } + if (first_time) { /* Collect dyntick-idle snapshots. */ force_qs_rnp(dyntick_save_progress_counter); -- cgit v1.2.3-58-ga151 From 4502138acc8f4139c94ce0ec0eee926f8805fbbc Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Sat, 29 Jul 2023 14:27:36 +0000 Subject: rcu/tree: Remove superfluous return from void call_rcu* functions The return keyword is not needed here. Signed-off-by: Joel Fernandes (Google) Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index cb1caefa8bd0..7c79480bfaa0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2713,7 +2713,7 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) */ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) { - return __call_rcu_common(head, func, false); + __call_rcu_common(head, func, false); } EXPORT_SYMBOL_GPL(call_rcu_hurry); #endif @@ -2764,7 +2764,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry); */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { - return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); + __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); } EXPORT_SYMBOL_GPL(call_rcu); -- cgit v1.2.3-58-ga151 From 16128b1f8c823438dcd3f3b4a57cbe7267bcf82f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Aug 2023 17:15:25 -0700 Subject: rcu: Add sysfs to provide throttled access to rcu_barrier() When running a series of stress tests all making heavy use of RCU, it is all too possible to OOM the system when the prior test's RCU callbacks don't get invoked until after the subsequent test starts. One way of handling this is just a timed wait, but this fails when a given CPU has so many callbacks queued that they take longer to invoke than allowed for by that timed wait. This commit therefore adds an rcutree.do_rcu_barrier module parameter that is accessible from sysfs. Writing one of the many synonyms for boolean "true" will cause an rcu_barrier() to be invoked, but will guarantee that no more than one rcu_barrier() will be invoked per sixteenth of a second via this mechanism. The flip side is that a given request might wait a second or three longer than absolutely necessary, but only when there are multiple uses of rcutree.do_rcu_barrier within a one-second time interval. This commit unnecessarily serializes the rcu_barrier() machinery, given that serialization is already provided by procfs. This has the advantage of allowing throttled rcu_barrier() from other sources within the kernel. Reported-by: Johannes Weiner Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- Documentation/admin-guide/kernel-parameters.txt | 7 +++ kernel/rcu/tree.c | 76 +++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0a1731a0f0ef..7ec8a406d419 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4769,6 +4769,13 @@ Set maximum number of finished RCU callbacks to process in one batch. + rcutree.do_rcu_barrier= [KNL] + Request a call to rcu_barrier(). This is + throttled so that userspace tests can safely + hammer on the sysfs variable if they so choose. + If triggered before the RCU grace-period machinery + is fully active, this will error out with EAGAIN. + rcutree.dump_tree= [KNL] Dump the structure of the rcu_node combining tree out at early boot. This is used for diagnostic diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7c79480bfaa0..3c7281fc25a7 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4083,6 +4083,82 @@ retry: } EXPORT_SYMBOL_GPL(rcu_barrier); +static unsigned long rcu_barrier_last_throttle; + +/** + * rcu_barrier_throttled - Do rcu_barrier(), but limit to one per second + * + * This can be thought of as guard rails around rcu_barrier() that + * permits unrestricted userspace use, at least assuming the hardware's + * try_cmpxchg() is robust. There will be at most one call per second to + * rcu_barrier() system-wide from use of this function, which means that + * callers might needlessly wait a second or three. + * + * This is intended for use by test suites to avoid OOM by flushing RCU + * callbacks from the previous test before starting the next. See the + * rcutree.do_rcu_barrier module parameter for more information. + * + * Why not simply make rcu_barrier() more scalable? That might be + * the eventual endpoint, but let's keep it simple for the time being. + * Note that the module parameter infrastructure serializes calls to a + * given .set() function, but should concurrent .set() invocation ever be + * possible, we are ready! + */ +static void rcu_barrier_throttled(void) +{ + unsigned long j = jiffies; + unsigned long old = READ_ONCE(rcu_barrier_last_throttle); + unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence); + + while (time_in_range(j, old, old + HZ / 16) || + !try_cmpxchg(&rcu_barrier_last_throttle, &old, j)) { + schedule_timeout_idle(HZ / 16); + if (rcu_seq_done(&rcu_state.barrier_sequence, s)) { + smp_mb(); /* caller's subsequent code after above check. */ + return; + } + j = jiffies; + old = READ_ONCE(rcu_barrier_last_throttle); + } + rcu_barrier(); +} + +/* + * Invoke rcu_barrier_throttled() when a rcutree.do_rcu_barrier + * request arrives. We insist on a true value to allow for possible + * future expansion. + */ +static int param_set_do_rcu_barrier(const char *val, const struct kernel_param *kp) +{ + bool b; + int ret; + + if (rcu_scheduler_active != RCU_SCHEDULER_RUNNING) + return -EAGAIN; + ret = kstrtobool(val, &b); + if (!ret && b) { + atomic_inc((atomic_t *)kp->arg); + rcu_barrier_throttled(); + atomic_dec((atomic_t *)kp->arg); + } + return ret; +} + +/* + * Output the number of outstanding rcutree.do_rcu_barrier requests. + */ +static int param_get_do_rcu_barrier(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%d\n", atomic_read((atomic_t *)kp->arg)); +} + +static const struct kernel_param_ops do_rcu_barrier_ops = { + .set = param_set_do_rcu_barrier, + .get = param_get_do_rcu_barrier, +}; +static atomic_t do_rcu_barrier; +module_param_cb(do_rcu_barrier, &do_rcu_barrier_ops, &do_rcu_barrier, 0644); + /* * Compute the mask of online CPUs for the specified rcu_node structure. * This will not be stable unless the rcu_node structure's ->lock is -- cgit v1.2.3-58-ga151 From 2cbc482d325ee58001472c4359b311958c4efdd1 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 5 Aug 2023 11:17:26 +0800 Subject: rcu: Dump memory object info if callback function is invalid When a structure containing an RCU callback rhp is (incorrectly) freed and reallocated after rhp is passed to call_rcu(), it is not unusual for rhp->func to be set to NULL. This defeats the debugging prints used by __call_rcu_common() in kernels built with CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, which expect to identify the offending code using the identity of this function. And in kernels build without CONFIG_DEBUG_OBJECTS_RCU_HEAD=y, things are even worse, as can be seen from this splat: Unable to handle kernel NULL pointer dereference at virtual address 0 ... ... PC is at 0x0 LR is at rcu_do_batch+0x1c0/0x3b8 ... ... (rcu_do_batch) from (rcu_core+0x1d4/0x284) (rcu_core) from (__do_softirq+0x24c/0x344) (__do_softirq) from (__irq_exit_rcu+0x64/0x108) (__irq_exit_rcu) from (irq_exit+0x8/0x10) (irq_exit) from (__handle_domain_irq+0x74/0x9c) (__handle_domain_irq) from (gic_handle_irq+0x8c/0x98) (gic_handle_irq) from (__irq_svc+0x5c/0x94) (__irq_svc) from (arch_cpu_idle+0x20/0x3c) (arch_cpu_idle) from (default_idle_call+0x4c/0x78) (default_idle_call) from (do_idle+0xf8/0x150) (do_idle) from (cpu_startup_entry+0x18/0x20) (cpu_startup_entry) from (0xc01530) This commit therefore adds calls to mem_dump_obj(rhp) to output some information, for example: slab kmalloc-256 start ffff410c45019900 pointer offset 0 size 256 This provides the rough size of the memory block and the offset of the rcu_head structure, which as least provides at least a few clues to help locate the problem. If the problem is reproducible, additional slab debugging can be enabled, for example, CONFIG_DEBUG_SLAB=y, which can provide significantly more information. Signed-off-by: Zhen Lei Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcu.h | 7 +++++++ kernel/rcu/srcutiny.c | 1 + kernel/rcu/srcutree.c | 1 + kernel/rcu/tasks.h | 1 + kernel/rcu/tiny.c | 1 + kernel/rcu/tree.c | 1 + 6 files changed, 12 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 98e13be411af..d612731feea4 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -10,6 +10,7 @@ #ifndef __LINUX_RCU_H #define __LINUX_RCU_H +#include #include /* @@ -248,6 +249,12 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void debug_rcu_head_callback(struct rcu_head *rhp) +{ + if (unlikely(!rhp->func)) + kmem_dump_obj(rhp); +} + extern int rcu_cpu_stall_suppress_at_boot; static inline bool rcu_stall_is_suppressed_at_boot(void) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 336af24e0fe3..c38e5933a5d6 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -138,6 +138,7 @@ void srcu_drive_gp(struct work_struct *wp) while (lh) { rhp = lh; lh = lh->next; + debug_rcu_head_callback(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index f1a905200fc2..833a8f848a90 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1710,6 +1710,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) rhp = rcu_cblist_dequeue(&ready_cbs); for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { debug_rcu_head_unqueue(rhp); + debug_rcu_head_callback(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 8d65f7d576a3..7c845532a50a 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -538,6 +538,7 @@ static void rcu_tasks_invoke_cbs(struct rcu_tasks *rtp, struct rcu_tasks_percpu raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); len = rcl.len; for (rhp = rcu_cblist_dequeue(&rcl); rhp; rhp = rcu_cblist_dequeue(&rcl)) { + debug_rcu_head_callback(rhp); local_bh_disable(); rhp->func(rhp); local_bh_enable(); diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index 42f7589e51e0..fec804b79080 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -97,6 +97,7 @@ static inline bool rcu_reclaim_tiny(struct rcu_head *head) trace_rcu_invoke_callback("", head); f = head->func; + debug_rcu_head_callback(head); WRITE_ONCE(head->func, (rcu_callback_t)0L); f(head); rcu_lock_release(&rcu_callback_map); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 3c7281fc25a7..aae515071ffd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2135,6 +2135,7 @@ static void rcu_do_batch(struct rcu_data *rdp) trace_rcu_invoke_callback(rcu_state.name, rhp); f = rhp->func; + debug_rcu_head_callback(rhp); WRITE_ONCE(rhp->func, (rcu_callback_t)0L); f(rhp); -- cgit v1.2.3-58-ga151 From 0ae9942f03d0d034fdb0a4f44fc99f62a3107987 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 18 Aug 2023 08:53:58 -0700 Subject: rcu: Eliminate rcu_gp_slow_unregister() false positive When using rcutorture as a module, there are a number of conditions that can abort the modprobe operation, for example, when attempting to run both RCU CPU stall warning tests and forward-progress tests. This can cause rcu_torture_cleanup() to be invoked on the unwind path out of rcu_rcu_torture_init(), which will mean that rcu_gp_slow_unregister() is invoked without a matching rcu_gp_slow_register(). This will cause a splat because rcu_gp_slow_unregister() is passed rcu_fwd_cb_nodelay, which does not match a NULL pointer. This commit therefore forgives a mismatch involving a NULL pointer, thus avoiding this false-positive splat. Signed-off-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index aae515071ffd..a83ecab77917 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1260,7 +1260,7 @@ EXPORT_SYMBOL_GPL(rcu_gp_slow_register); /* Unregister a counter, with NULL for not caring which. */ void rcu_gp_slow_unregister(atomic_t *rgssp) { - WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress); + WARN_ON_ONCE(rgssp && rgssp != rcu_gp_slow_suppress && rcu_gp_slow_suppress != NULL); WRITE_ONCE(rcu_gp_slow_suppress, NULL); } -- cgit v1.2.3-58-ga151 From 5f98fd034ca6fd1ab8c91a3488968a0e9caaabf6 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sat, 30 Sep 2023 17:46:56 +0000 Subject: rcu: kmemleak: Ignore kmemleak false positives when RCU-freeing objects Since the actual slab freeing is deferred when calling kvfree_rcu(), so is the kmemleak_free() callback informing kmemleak of the object deletion. From the perspective of the kvfree_rcu() caller, the object is freed and it may remove any references to it. Since kmemleak does not scan RCU internal data storing the pointer, it will report such objects as leaks during the grace period. Tell kmemleak to ignore such objects on the kvfree_call_rcu() path. Note that the tiny RCU implementation does not have such issue since the objects can be tracked from the rcu_ctrlblk structure. Signed-off-by: Catalin Marinas Reported-by: Christoph Paasch Closes: https://lore.kernel.org/all/F903A825-F05F-4B77-A2B5-7356282FBA2C@apple.com/ Cc: Tested-by: Christoph Paasch Reviewed-by: Paul E. McKenney Signed-off-by: Joel Fernandes (Google) Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index a83ecab77917..4dd7df30df31 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -3389,6 +3390,14 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) success = true; } + /* + * The kvfree_rcu() caller considers the pointer freed at this point + * and likely removes any references to it. Since the actual slab + * freeing (and kmemleak_free()) is deferred, tell kmemleak to ignore + * this object (no scanning or false positives reporting). + */ + kmemleak_ignore(ptr); + // Set timer to drain after KFREE_DRAIN_JIFFIES. if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING) schedule_delayed_monitor_work(krcp); -- cgit v1.2.3-58-ga151 From 358662a9616c5078dc4d389d6bceeb5974f4aa97 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:35:58 +0200 Subject: rcu: Assume IRQS disabled from rcu_report_dead() rcu_report_dead() is the last RCU word from the CPU down through the hotplug path. It is called in the idle loop right before the CPU shuts down for good. Because it removes the CPU from the grace period state machine and reports an ultimate quiescent state if necessary, no further use of RCU is allowed. Therefore it is expected that IRQs are disabled upon calling this function and are not to be re-enabled again until the CPU shuts down. Remove the IRQs disablement from that function and verify instead that it is actually called with IRQs disabled as it is expected at that special point in the idle path. Reviewed-by: Joel Fernandes (Google) Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- kernel/rcu/tree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 4dd7df30df31..8c2954502e55 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4562,11 +4562,16 @@ void rcu_cpu_starting(unsigned int cpu) */ void rcu_report_dead(unsigned int cpu) { - unsigned long flags, seq_flags; + unsigned long flags; unsigned long mask; struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + /* + * IRQS must be disabled from now on and until the CPU dies, or an interrupt + * may introduce a new READ-side while it is actually off the QS masks. + */ + lockdep_assert_irqs_disabled(); // Do any dangling deferred wakeups. do_nocb_deferred_wakeup(rdp); @@ -4574,7 +4579,6 @@ void rcu_report_dead(unsigned int cpu) /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ mask = rdp->grpmask; - local_irq_save(seq_flags); arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq); @@ -4588,8 +4592,6 @@ void rcu_report_dead(unsigned int cpu) WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext & ~mask); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); arch_spin_unlock(&rcu_state.ofl_lock); - local_irq_restore(seq_flags); - rdp->cpu_started = false; } -- cgit v1.2.3-58-ga151 From c964c1f5ee96e1460606d44f80a47bdacd8fe568 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:35:59 +0200 Subject: rcu: Assume rcu_report_dead() is always called locally rcu_report_dead() has to be called locally by the CPU that is going to exit the RCU state machine. Passing a cpu argument here is error-prone and leaves the possibility for a racy remote call. Use local access instead. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- arch/arm64/kernel/smp.c | 2 +- include/linux/rcupdate.h | 2 +- kernel/cpu.c | 2 +- kernel/rcu/tree.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 960b98b43506..8fa646c90c67 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -401,7 +401,7 @@ void __noreturn cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); - rcu_report_dead(cpu); + rcu_report_dead(); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e5f920ade90..aa351ddcbe8d 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,7 +122,7 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(unsigned int cpu); +void rcu_report_dead(void); void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC diff --git a/kernel/cpu.c b/kernel/cpu.c index 6de7c6bb74ee..076e75fed8bb 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1388,7 +1388,7 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); - rcu_report_dead(smp_processor_id()); + rcu_report_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* * We cannot call complete after rcu_report_dead() so we delegate it diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8c2954502e55..2e1e7eadf2cc 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4560,11 +4560,11 @@ void rcu_cpu_starting(unsigned int cpu) * from the outgoing CPU rather than from the cpuhp_step mechanism. * This is because this function must be invoked at a precise location. */ -void rcu_report_dead(unsigned int cpu) +void rcu_report_dead(void) { unsigned long flags; unsigned long mask; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ /* -- cgit v1.2.3-58-ga151 From 2cb1f6e9a743af58a23cf14563b5eada1e0d3fde Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:36:00 +0200 Subject: rcu: Conditionally build CPU-hotplug teardown callbacks Among the three CPU-hotplug teardown RCU callbacks, two of them early exit if CONFIG_HOTPLUG_CPU=n, and one is left unchanged. In any case all of them have an implementation when CONFIG_HOTPLUG_CPU=n. Align instead with the common way to deal with CPU-hotplug teardown callbacks and provide a proper stub when they are not supported. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- include/linux/rcutree.h | 11 ++++- kernel/rcu/tree.c | 114 +++++++++++++++++++++++------------------------- 2 files changed, 63 insertions(+), 62 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 153cfc7bbffd..46875c4e9f56 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -110,9 +110,16 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -int rcutree_offline_cpu(unsigned int cpu); +void rcu_cpu_starting(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); int rcutree_dying_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +int rcutree_offline_cpu(unsigned int cpu); +#else +#define rcutree_dead_cpu NULL +#define rcutree_dying_cpu NULL +#define rcutree_offline_cpu NULL +#endif #endif /* __LINUX_RCUTREE_H */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2e1e7eadf2cc..f9c6b2680cbb 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4237,25 +4237,6 @@ static bool rcu_init_invoked(void) return !!rcu_state.n_online_cpus; } -/* - * Near the end of the offline process. Trace the fact that this CPU - * is going offline. - */ -int rcutree_dying_cpu(unsigned int cpu) -{ - bool blkd; - struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); - struct rcu_node *rnp = rdp->mynode; - - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) - return 0; - - blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask); - trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), - blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); - return 0; -} - /* * All CPUs for the specified rcu_node structure have gone offline, * and all tasks that were preempted within an RCU read-side critical @@ -4301,23 +4282,6 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) } } -/* - * The CPU has been completely removed, and some other CPU is reporting - * this fact from process context. Do the remainder of the cleanup. - * There can only be one CPU hotplug operation at a time, so no need for - * explicit locking. - */ -int rcutree_dead_cpu(unsigned int cpu) -{ - if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) - return 0; - - WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); - // Stop-machine done, so allow nohz_full to disable tick. - tick_dep_clear(TICK_DEP_BIT_RCU); - return 0; -} - /* * Propagate ->qsinitmask bits up the rcu_node tree to account for the * first CPU in a given leaf rcu_node structure coming online. The caller @@ -4470,29 +4434,6 @@ int rcutree_online_cpu(unsigned int cpu) return 0; } -/* - * Near the beginning of the process. The CPU is still very much alive - * with pretty much all services enabled. - */ -int rcutree_offline_cpu(unsigned int cpu) -{ - unsigned long flags; - struct rcu_data *rdp; - struct rcu_node *rnp; - - rdp = per_cpu_ptr(&rcu_data, cpu); - rnp = rdp->mynode; - raw_spin_lock_irqsave_rcu_node(rnp, flags); - rnp->ffmask &= ~rdp->grpmask; - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); - - rcutree_affinity_setting(cpu, cpu); - - // nohz_full CPUs need the tick for stop-machine to work quickly - tick_dep_set(TICK_DEP_BIT_RCU); - return 0; -} - /* * Mark the specified CPU as being online so that subsequent grace periods * (both expedited and normal) will wait on it. Note that this means that @@ -4646,7 +4587,60 @@ void rcutree_migrate_callbacks(int cpu) cpu, rcu_segcblist_n_cbs(&rdp->cblist), rcu_segcblist_first_cb(&rdp->cblist)); } -#endif + +/* + * The CPU has been completely removed, and some other CPU is reporting + * this fact from process context. Do the remainder of the cleanup. + * There can only be one CPU hotplug operation at a time, so no need for + * explicit locking. + */ +int rcutree_dead_cpu(unsigned int cpu) +{ + WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus - 1); + // Stop-machine done, so allow nohz_full to disable tick. + tick_dep_clear(TICK_DEP_BIT_RCU); + return 0; +} + +/* + * Near the end of the offline process. Trace the fact that this CPU + * is going offline. + */ +int rcutree_dying_cpu(unsigned int cpu) +{ + bool blkd; + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + struct rcu_node *rnp = rdp->mynode; + + blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask); + trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq), + blkd ? TPS("cpuofl-bgp") : TPS("cpuofl")); + return 0; +} + +/* + * Near the beginning of the process. The CPU is still very much alive + * with pretty much all services enabled. + */ +int rcutree_offline_cpu(unsigned int cpu) +{ + unsigned long flags; + struct rcu_data *rdp; + struct rcu_node *rnp; + + rdp = per_cpu_ptr(&rcu_data, cpu); + rnp = rdp->mynode; + raw_spin_lock_irqsave_rcu_node(rnp, flags); + rnp->ffmask &= ~rdp->grpmask; + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + + rcutree_affinity_setting(cpu, cpu); + + // nohz_full CPUs need the tick for stop-machine to work quickly + tick_dep_set(TICK_DEP_BIT_RCU); + return 0; +} +#endif /* #ifdef CONFIG_HOTPLUG_CPU */ /* * On non-huge systems, use expedited RCU grace periods to make suspend -- cgit v1.2.3-58-ga151 From 448e9f34d91d1a4799fdb06a93c2c24b34b6fd9d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 8 Sep 2023 22:36:01 +0200 Subject: rcu: Standardize explicit CPU-hotplug calls rcu_report_dead() and rcutree_migrate_callbacks() have their headers in rcupdate.h while those are pure rcutree calls, like the other CPU-hotplug functions. Also rcu_cpu_starting() and rcu_report_dead() have different naming conventions while they mirror each other's effects. Fix the headers and propose a naming that relates both functions and aligns with the prefix of other rcutree CPU-hotplug functions. Reviewed-by: Paul E. McKenney Signed-off-by: Frederic Weisbecker --- .../Expedited-Grace-Periods/Expedited-Grace-Periods.rst | 2 +- Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg | 4 ++-- Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg | 4 ++-- Documentation/RCU/Design/Memory-Ordering/TreeRCU-hotplug.svg | 4 ++-- Documentation/RCU/Design/Requirements/Requirements.rst | 4 ++-- arch/arm64/kernel/smp.c | 4 ++-- arch/powerpc/kernel/smp.c | 2 +- arch/s390/kernel/smp.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- include/linux/interrupt.h | 2 +- include/linux/rcupdate.h | 2 -- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 7 ++++++- kernel/cpu.c | 6 +++--- kernel/rcu/tree.c | 12 ++++++++---- 15 files changed, 33 insertions(+), 26 deletions(-) (limited to 'kernel/rcu/tree.c') diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst index 93d899d53258..414f8a2012d6 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.rst @@ -181,7 +181,7 @@ operations is carried out at several levels: of this wait (or series of waits, as the case may be) is to permit a concurrent CPU-hotplug operation to complete. #. In the case of RCU-sched, one of the last acts of an outgoing CPU is - to invoke ``rcu_report_dead()``, which reports a quiescent state for + to invoke ``rcutree_report_cpu_dead()``, which reports a quiescent state for that CPU. However, this is likely paranoia-induced redundancy. +-----------------------------------------------------------------------+ diff --git a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg index 7ddc094d7f28..d82a77d03d8c 100644 --- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg +++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp-fqs.svg @@ -1135,7 +1135,7 @@ font-weight="bold" font-size="192" id="text202-7-5-3-27-6-5" - style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() rcu_report_dead() + style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcutree_report_cpu_dead() rcu_cpu_starting() + xml:space="preserve">rcutree_report_cpu_starting() setup_cpu) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index a4edb7ea66ea..214a1b67f80a 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -898,7 +898,7 @@ static void smp_start_secondary(void *cpuvoid) S390_lowcore.restart_flags = 0; restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); init_cpu_timer(); vtime_init(); vdso_getcpu_init(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 4e45ff44aa07..4ccb76f89af8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -288,7 +288,7 @@ static void notrace start_secondary(void *unused) cpu_init(); fpu__init_cpu(); - rcu_cpu_starting(raw_smp_processor_id()); + rcutree_report_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); ap_starting(); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a92bce40b04b..d05e1e9a553c 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -566,7 +566,7 @@ enum * * _ RCU: * 1) rcutree_migrate_callbacks() migrates the queue. - * 2) rcu_report_dead() reports the final quiescent states. + * 2) rcutree_report_cpu_dead() reports the final quiescent states. * * _ IRQ_POLL: irq_poll_cpu_dead() migrates the queue */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index aa351ddcbe8d..f7206b2623c9 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -122,8 +122,6 @@ static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) void rcu_init(void); extern int rcu_scheduler_active; void rcu_sched_clock_irq(int user); -void rcu_report_dead(void); -void rcutree_migrate_callbacks(int cpu); #ifdef CONFIG_TASKS_RCU_GENERIC void rcu_init_tasks_generic(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 7b949292908a..d9ac7b136aea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -171,6 +171,6 @@ static inline void rcu_all_qs(void) { barrier(); } #define rcutree_offline_cpu NULL #define rcutree_dead_cpu NULL #define rcutree_dying_cpu NULL -static inline void rcu_cpu_starting(unsigned int cpu) { } +static inline void rcutree_report_cpu_starting(unsigned int cpu) { } #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 46875c4e9f56..254244202ea9 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -110,7 +110,7 @@ void rcu_all_qs(void); /* RCUtree hotplug events */ int rcutree_prepare_cpu(unsigned int cpu); int rcutree_online_cpu(unsigned int cpu); -void rcu_cpu_starting(unsigned int cpu); +void rcutree_report_cpu_starting(unsigned int cpu); #ifdef CONFIG_HOTPLUG_CPU int rcutree_dead_cpu(unsigned int cpu); @@ -122,4 +122,9 @@ int rcutree_offline_cpu(unsigned int cpu); #define rcutree_offline_cpu NULL #endif +void rcutree_migrate_callbacks(int cpu); + +/* Called from hotplug and also arm64 early secondary boot failure */ +void rcutree_report_cpu_dead(void); + #endif /* __LINUX_RCUTREE_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 076e75fed8bb..2491766e1fd5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1388,10 +1388,10 @@ void cpuhp_report_idle_dead(void) struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); - rcu_report_dead(); + rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* - * We cannot call complete after rcu_report_dead() so we delegate it + * We cannot call complete after rcutree_report_cpu_dead() so we delegate it * to an online cpu. */ smp_call_function_single(cpumask_first(cpu_online_mask), @@ -1617,7 +1617,7 @@ void notify_cpu_starting(unsigned int cpu) struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); - rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ + rcutree_report_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ cpumask_set_cpu(cpu, &cpus_booted_once_mask); /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f9c6b2680cbb..36d8818eaec1 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4216,7 +4216,7 @@ bool rcu_lockdep_current_cpu_online(void) rdp = this_cpu_ptr(&rcu_data); /* * Strictly, we care here about the case where the current CPU is - * in rcu_cpu_starting() and thus has an excuse for rdp->grpmask + * in rcutree_report_cpu_starting() and thus has an excuse for rdp->grpmask * not being up to date. So arch_spin_is_locked() might have a * false positive if it's held by some *other* CPU, but that's * OK because that just means a false *negative* on the warning. @@ -4445,8 +4445,10 @@ int rcutree_online_cpu(unsigned int cpu) * from the incoming CPU rather than from the cpuhp_step mechanism. * This is because this function must be invoked at a precise location. * This incoming CPU must not have enabled interrupts yet. + * + * This mirrors the effects of rcutree_report_cpu_dead(). */ -void rcu_cpu_starting(unsigned int cpu) +void rcutree_report_cpu_starting(unsigned int cpu) { unsigned long mask; struct rcu_data *rdp; @@ -4500,8 +4502,10 @@ void rcu_cpu_starting(unsigned int cpu) * Note that this function is special in that it is invoked directly * from the outgoing CPU rather than from the cpuhp_step mechanism. * This is because this function must be invoked at a precise location. + * + * This mirrors the effect of rcutree_report_cpu_starting(). */ -void rcu_report_dead(void) +void rcutree_report_cpu_dead(void) { unsigned long flags; unsigned long mask; @@ -5072,7 +5076,7 @@ void __init rcu_init(void) pm_notifier(rcu_pm_notify, 0); WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot. rcutree_prepare_cpu(cpu); - rcu_cpu_starting(cpu); + rcutree_report_cpu_starting(cpu); rcutree_online_cpu(cpu); /* Create workqueue for Tree SRCU and for expedited GPs. */ -- cgit v1.2.3-58-ga151