diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 10:00:04 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-12 10:00:04 +0900 |
commit | 70fdcb83db15c85a0495b07dc55d9347a4c2efd9 (patch) | |
tree | 5a7f073e87e14aacfc77a21a3bf37bc470143871 /kernel | |
parent | edae583a6d4d1ad2eb73981787790993fef1bbad (diff) | |
parent | 0e95c69bde1a5bf22acd53b356fe10d7bec6e2be (diff) |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar:
"The main RCU changes in this cycle are:
- Idle entry/exit changes, to throttle callback execution and other
refinements to speed up kbuild, primarily to address performance
issues located by Tibor Billes.
- Grace-period related changes, primarily to aid in debugging,
inspired by an -rt debugging session.
- Code reorganization moving RCU's source files into its own
kernel/rcu/ directory.
- RCU documentation updates
- Miscellaneous fixes.
Note, the following commit:
5c889690aa08 mm: Place preemption point in do_mlockall() loop
is identical to the commit already in your tree via email:
22356f447ceb mm: Place preemption point in do_mlockall() loop
[ Your version of the changelog nicely demonstrates it how kernel oops
messages should be trimmed properly :-/ ]"
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (30 commits)
rcu: Move RCU-related source code to kernel/rcu directory
rcu: Fix occurrence of "the the" in checklist.txt
kthread: Add pointer to vmstat-avoidance patch
rcu: Update stall-warning documentation
rcu: Consistent rcu_is_watching() naming
rcu: Change EXPORT_SYMBOL() to EXPORT_SYMBOL_GPL()
rcu: Is it safe to enter an RCU read-side critical section?
rcu: Throttle invoke_rcu_core() invocations due to non-lazy callbacks
rcu: Throttle rcu_try_advance_all_cbs() execution
rcu: Remove redundant code from rcu_cleanup_after_idle()
rcu: Fix CONFIG_RCU_NOCB_CPU_ALL panic on machines with sparse CPU mask
rcu: Avoid sparse warnings in rcu_nocb_wake trace event
rcu: Track rcu_nocb_kthread()'s sleeping and awakening
rcu: Distinguish between NOCB and non-NOCB rcu_callback trace events
rcu: Add tracing for rcuo no-CBs CPU wakeup handshake
rcu: Add tracing of normal (non-NOCB) grace-period requests
rcu: Add tracing to rcu_gp_kthread()
rcu: Flag lockless access to ->gp_flags with ACCESS_ONCE()
rcu: Prevent spurious-wakeup DoS attack on rcu_gp_kthread()
rcu: Improve grace-period start logic
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 11 | ||||
-rw-r--r-- | kernel/lockdep.c | 4 | ||||
-rw-r--r-- | kernel/rcu/Makefile | 6 | ||||
-rw-r--r-- | kernel/rcu/rcu.h (renamed from kernel/rcu.h) | 7 | ||||
-rw-r--r-- | kernel/rcu/srcu.c (renamed from kernel/srcu.c) | 0 | ||||
-rw-r--r-- | kernel/rcu/tiny.c (renamed from kernel/rcutiny.c) | 37 | ||||
-rw-r--r-- | kernel/rcu/tiny_plugin.h (renamed from kernel/rcutiny_plugin.h) | 0 | ||||
-rw-r--r-- | kernel/rcu/torture.c (renamed from kernel/rcutorture.c) | 6 | ||||
-rw-r--r-- | kernel/rcu/tree.c (renamed from kernel/rcutree.c) | 185 | ||||
-rw-r--r-- | kernel/rcu/tree.h (renamed from kernel/rcutree.h) | 2 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h (renamed from kernel/rcutree_plugin.h) | 84 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c (renamed from kernel/rcutree_trace.c) | 2 | ||||
-rw-r--r-- | kernel/rcu/update.c (renamed from kernel/rcupdate.c) | 10 |
13 files changed, 251 insertions, 103 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 1ce47553fb02..f99d908b5550 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -6,9 +6,9 @@ obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ - rcupdate.o extable.o params.o posix-timers.o \ + extable.o params.o posix-timers.o \ kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \ - hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ + hrtimer.o rwsem.o nsproxy.o semaphore.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o groups.o lglock.o smpboot.o @@ -27,6 +27,7 @@ obj-y += power/ obj-y += printk/ obj-y += cpu/ obj-y += irq/ +obj-y += rcu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o @@ -81,12 +82,6 @@ obj-$(CONFIG_KGDB) += debug/ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_SECCOMP) += seccomp.o -obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o -obj-$(CONFIG_TREE_RCU) += rcutree.o -obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o -obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o -obj-$(CONFIG_TINY_RCU) += rcutiny.o -obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e16c45b9ee77..4e8e14c34e42 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -4224,7 +4224,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", !rcu_lockdep_current_cpu_online() ? "RCU used illegally from offline CPU!\n" - : rcu_is_cpu_idle() + : !rcu_is_watching() ? "RCU used illegally from idle CPU!\n" : "", rcu_scheduler_active, debug_locks); @@ -4247,7 +4247,7 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) * So complain bitterly if someone does call rcu_read_lock(), * rcu_read_lock_bh() and so on from extended quiescent states. */ - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) printk("RCU used illegally from extended quiescent state!\n"); lockdep_print_held_locks(curr); diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile new file mode 100644 index 000000000000..01e9ec37a3e3 --- /dev/null +++ b/kernel/rcu/Makefile @@ -0,0 +1,6 @@ +obj-y += update.o srcu.o +obj-$(CONFIG_RCU_TORTURE_TEST) += torture.o +obj-$(CONFIG_TREE_RCU) += tree.o +obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o +obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o +obj-$(CONFIG_TINY_RCU) += tiny.o diff --git a/kernel/rcu.h b/kernel/rcu/rcu.h index 77131966c4ad..7859a0a3951e 100644 --- a/kernel/rcu.h +++ b/kernel/rcu/rcu.h @@ -122,4 +122,11 @@ int rcu_jiffies_till_stall_check(void); #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ +/* + * Strings used in tracepoints need to be exported via the + * tracing system such that tools like perf and trace-cmd can + * translate the string address pointers to actual text. + */ +#define TPS(x) tracepoint_string(x) + #endif /* __LINUX_RCU_H */ diff --git a/kernel/srcu.c b/kernel/rcu/srcu.c index 01d5ccb8bfe3..01d5ccb8bfe3 100644 --- a/kernel/srcu.c +++ b/kernel/rcu/srcu.c diff --git a/kernel/rcutiny.c b/kernel/rcu/tiny.c index 9ed6075dc562..0c9a934cfec1 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcu/tiny.c @@ -35,6 +35,7 @@ #include <linux/time.h> #include <linux/cpu.h> #include <linux/prefetch.h> +#include <linux/ftrace_event.h> #ifdef CONFIG_RCU_TRACE #include <trace/events/rcu.h> @@ -42,7 +43,7 @@ #include "rcu.h" -/* Forward declarations for rcutiny_plugin.h. */ +/* Forward declarations for tiny_plugin.h. */ struct rcu_ctrlblk; static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); static void rcu_process_callbacks(struct softirq_action *unused); @@ -52,22 +53,23 @@ static void __call_rcu(struct rcu_head *head, static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; -#include "rcutiny_plugin.h" +#include "tiny_plugin.h" /* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ static void rcu_idle_enter_common(long long newval) { if (newval) { - RCU_TRACE(trace_rcu_dyntick("--=", + RCU_TRACE(trace_rcu_dyntick(TPS("--="), rcu_dynticks_nesting, newval)); rcu_dynticks_nesting = newval; return; } - RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval)); + RCU_TRACE(trace_rcu_dyntick(TPS("Start"), + rcu_dynticks_nesting, newval)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), rcu_dynticks_nesting, newval)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -120,15 +122,15 @@ EXPORT_SYMBOL_GPL(rcu_irq_exit); static void rcu_idle_exit_common(long long oldval) { if (oldval) { - RCU_TRACE(trace_rcu_dyntick("++=", + RCU_TRACE(trace_rcu_dyntick(TPS("++="), oldval, rcu_dynticks_nesting)); return; } - RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting)); + RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); if (!is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); - RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task", + RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), oldval, rcu_dynticks_nesting)); ftrace_dump(DUMP_ALL); WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", @@ -174,18 +176,18 @@ void rcu_irq_enter(void) } EXPORT_SYMBOL_GPL(rcu_irq_enter); -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) /* * Test whether RCU thinks that the current CPU is idle. */ -int rcu_is_cpu_idle(void) +bool __rcu_is_watching(void) { - return !rcu_dynticks_nesting; + return rcu_dynticks_nesting; } -EXPORT_SYMBOL(rcu_is_cpu_idle); +EXPORT_SYMBOL(__rcu_is_watching); -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ /* * Test whether the current CPU was interrupted from idle. Nested @@ -273,7 +275,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) if (&rcp->rcucblist == rcp->donetail) { RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, - ACCESS_ONCE(rcp->rcucblist), + !!ACCESS_ONCE(rcp->rcucblist), need_resched(), is_idle_task(current), false)); @@ -304,7 +306,8 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) RCU_TRACE(cb_count++); } RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); - RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(), + RCU_TRACE(trace_rcu_batch_end(rcp->name, + cb_count, 0, need_resched(), is_idle_task(current), false)); } diff --git a/kernel/rcutiny_plugin.h b/kernel/rcu/tiny_plugin.h index 280d06cae352..280d06cae352 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcu/tiny_plugin.h diff --git a/kernel/rcutorture.c b/kernel/rcu/torture.c index be63101c6175..3929cd451511 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcu/torture.c @@ -52,6 +52,12 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@freedesktop.org>"); +MODULE_ALIAS("rcutorture"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutorture." + static int fqs_duration; module_param(fqs_duration, int, 0444); MODULE_PARM_DESC(fqs_duration, "Duration of fqs bursts (us), 0 to disable"); diff --git a/kernel/rcutree.c b/kernel/rcu/tree.c index 32618b3fe4e6..8a2c81e86dda 100644 --- a/kernel/rcutree.c +++ b/kernel/rcu/tree.c @@ -41,6 +41,7 @@ #include <linux/export.h> #include <linux/completion.h> #include <linux/moduleparam.h> +#include <linux/module.h> #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/cpu.h> @@ -56,17 +57,16 @@ #include <linux/ftrace_event.h> #include <linux/suspend.h> -#include "rcutree.h" +#include "tree.h" #include <trace/events/rcu.h> #include "rcu.h" -/* - * Strings used in tracepoints need to be exported via the - * tracing system such that tools like perf and trace-cmd can - * translate the string address pointers to actual text. - */ -#define TPS(x) tracepoint_string(x) +MODULE_ALIAS("rcutree"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcutree." /* Data structures. */ @@ -222,7 +222,7 @@ void rcu_note_context_switch(int cpu) } EXPORT_SYMBOL_GPL(rcu_note_context_switch); -DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { +static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_NO_HZ_FULL_SYSIDLE @@ -371,7 +371,8 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, { trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0); ftrace_dump(DUMP_ORIG); @@ -407,7 +408,7 @@ static void rcu_eqs_enter(bool user) long long oldval; struct rcu_dynticks *rdtp; - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0); if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE) @@ -435,7 +436,7 @@ void rcu_idle_enter(void) local_irq_save(flags); rcu_eqs_enter(false); - rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0); + rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_enter); @@ -478,7 +479,7 @@ void rcu_irq_exit(void) struct rcu_dynticks *rdtp; local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting--; WARN_ON_ONCE(rdtp->dynticks_nesting < 0); @@ -508,7 +509,8 @@ static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, rcu_cleanup_after_idle(smp_processor_id()); trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting); if (!user && !is_idle_task(current)) { - struct task_struct *idle = idle_task(smp_processor_id()); + struct task_struct *idle __maybe_unused = + idle_task(smp_processor_id()); trace_rcu_dyntick(TPS("Error on exit: not idle task"), oldval, rdtp->dynticks_nesting); @@ -528,7 +530,7 @@ static void rcu_eqs_exit(bool user) struct rcu_dynticks *rdtp; long long oldval; - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; WARN_ON_ONCE(oldval < 0); if (oldval & DYNTICK_TASK_NEST_MASK) @@ -555,7 +557,7 @@ void rcu_idle_exit(void) local_irq_save(flags); rcu_eqs_exit(false); - rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0); + rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(rcu_idle_exit); @@ -599,7 +601,7 @@ void rcu_irq_enter(void) long long oldval; local_irq_save(flags); - rdtp = &__get_cpu_var(rcu_dynticks); + rdtp = this_cpu_ptr(&rcu_dynticks); oldval = rdtp->dynticks_nesting; rdtp->dynticks_nesting++; WARN_ON_ONCE(rdtp->dynticks_nesting == 0); @@ -620,7 +622,7 @@ void rcu_irq_enter(void) */ void rcu_nmi_enter(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); if (rdtp->dynticks_nmi_nesting == 0 && (atomic_read(&rdtp->dynticks) & 0x1)) @@ -642,7 +644,7 @@ void rcu_nmi_enter(void) */ void rcu_nmi_exit(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); if (rdtp->dynticks_nmi_nesting == 0 || --rdtp->dynticks_nmi_nesting != 0) @@ -655,21 +657,34 @@ void rcu_nmi_exit(void) } /** - * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle + * __rcu_is_watching - are RCU read-side critical sections safe? + * + * Return true if RCU is watching the running CPU, which means that + * this CPU can safely enter RCU read-side critical sections. Unlike + * rcu_is_watching(), the caller of __rcu_is_watching() must have at + * least disabled preemption. + */ +bool __rcu_is_watching(void) +{ + return atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1; +} + +/** + * rcu_is_watching - see if RCU thinks that the current CPU is idle * * If the current CPU is in its idle loop and is neither in an interrupt * or NMI handler, return true. */ -int rcu_is_cpu_idle(void) +bool rcu_is_watching(void) { int ret; preempt_disable(); - ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; + ret = __rcu_is_watching(); preempt_enable(); return ret; } -EXPORT_SYMBOL(rcu_is_cpu_idle); +EXPORT_SYMBOL_GPL(rcu_is_watching); #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) @@ -703,7 +718,7 @@ bool rcu_lockdep_current_cpu_online(void) if (in_nmi()) return 1; preempt_disable(); - rdp = &__get_cpu_var(rcu_sched_data); + rdp = this_cpu_ptr(&rcu_sched_data); rnp = rdp->mynode; ret = (rdp->grpmask & rnp->qsmaskinit) || !rcu_scheduler_fully_active; @@ -723,7 +738,7 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); */ static int rcu_is_cpu_rrupt_from_idle(void) { - return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; + return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1; } /* @@ -802,8 +817,11 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, static void record_gp_stall_check_time(struct rcu_state *rsp) { - rsp->gp_start = jiffies; - rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check(); + unsigned long j = ACCESS_ONCE(jiffies); + + rsp->gp_start = j; + smp_wmb(); /* Record start time before stall time. */ + rsp->jiffies_stall = j + rcu_jiffies_till_stall_check(); } /* @@ -932,17 +950,48 @@ static void print_cpu_stall(struct rcu_state *rsp) static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { + unsigned long completed; + unsigned long gpnum; + unsigned long gps; unsigned long j; unsigned long js; struct rcu_node *rnp; - if (rcu_cpu_stall_suppress) + if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp)) return; j = ACCESS_ONCE(jiffies); + + /* + * Lots of memory barriers to reject false positives. + * + * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall, + * then rsp->gp_start, and finally rsp->completed. These values + * are updated in the opposite order with memory barriers (or + * equivalent) during grace-period initialization and cleanup. + * Now, a false positive can occur if we get an new value of + * rsp->gp_start and a old value of rsp->jiffies_stall. But given + * the memory barriers, the only way that this can happen is if one + * grace period ends and another starts between these two fetches. + * Detect this by comparing rsp->completed with the previous fetch + * from rsp->gpnum. + * + * Given this check, comparisons of jiffies, rsp->jiffies_stall, + * and rsp->gp_start suffice to forestall false positives. + */ + gpnum = ACCESS_ONCE(rsp->gpnum); + smp_rmb(); /* Pick up ->gpnum first... */ js = ACCESS_ONCE(rsp->jiffies_stall); + smp_rmb(); /* ...then ->jiffies_stall before the rest... */ + gps = ACCESS_ONCE(rsp->gp_start); + smp_rmb(); /* ...and finally ->gp_start before ->completed. */ + completed = ACCESS_ONCE(rsp->completed); + if (ULONG_CMP_GE(completed, gpnum) || + ULONG_CMP_LT(j, js) || + ULONG_CMP_GE(gps, js)) + return; /* No stall or GP completed since entering function. */ rnp = rdp->mynode; if (rcu_gp_in_progress(rsp) && - (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) { + (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) { /* We haven't checked in, so go dump stack. */ print_cpu_stall(rsp); @@ -1297,7 +1346,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) } /* - * Initialize a new grace period. + * Initialize a new grace period. Return 0 if no grace period required. */ static int rcu_gp_init(struct rcu_state *rsp) { @@ -1306,18 +1355,27 @@ static int rcu_gp_init(struct rcu_state *rsp) rcu_bind_gp_kthread(); raw_spin_lock_irq(&rnp->lock); + if (rsp->gp_flags == 0) { + /* Spurious wakeup, tell caller to go back to sleep. */ + raw_spin_unlock_irq(&rnp->lock); + return 0; + } rsp->gp_flags = 0; /* Clear all flags: New grace period. */ - if (rcu_gp_in_progress(rsp)) { - /* Grace period already in progress, don't start another. */ + if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { + /* + * Grace period already in progress, don't start another. + * Not supposed to be able to happen. + */ raw_spin_unlock_irq(&rnp->lock); return 0; } /* Advance to a new grace period and initialize state. */ + record_gp_stall_check_time(rsp); + smp_wmb(); /* Record GP times before starting GP. */ rsp->gpnum++; trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start")); - record_gp_stall_check_time(rsp); raw_spin_unlock_irq(&rnp->lock); /* Exclude any concurrent CPU-hotplug operations. */ @@ -1366,7 +1424,7 @@ static int rcu_gp_init(struct rcu_state *rsp) /* * Do one round of quiescent-state forcing. */ -int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) +static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) { int fqs_state = fqs_state_in; bool isidle = false; @@ -1451,8 +1509,12 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) rsp->fqs_state = RCU_GP_IDLE; rdp = this_cpu_ptr(rsp->rda); rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */ - if (cpu_needs_another_gp(rsp, rdp)) - rsp->gp_flags = 1; + if (cpu_needs_another_gp(rsp, rdp)) { + rsp->gp_flags = RCU_GP_FLAG_INIT; + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("newreq")); + } raw_spin_unlock_irq(&rnp->lock); } @@ -1462,6 +1524,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp) static int __noreturn rcu_gp_kthread(void *arg) { int fqs_state; + int gf; unsigned long j; int ret; struct rcu_state *rsp = arg; @@ -1471,14 +1534,19 @@ static int __noreturn rcu_gp_kthread(void *arg) /* Handle grace-period start. */ for (;;) { + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("reqwait")); wait_event_interruptible(rsp->gp_wq, - rsp->gp_flags & + ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_INIT); - if ((rsp->gp_flags & RCU_GP_FLAG_INIT) && - rcu_gp_init(rsp)) + if (rcu_gp_init(rsp)) break; cond_resched(); flush_signals(current); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("reqwaitsig")); } /* Handle quiescent-state forcing. */ @@ -1488,10 +1556,16 @@ static int __noreturn rcu_gp_kthread(void *arg) j = HZ; jiffies_till_first_fqs = HZ; } + ret = 0; for (;;) { - rsp->jiffies_force_qs = jiffies + j; + if (!ret) + rsp->jiffies_force_qs = jiffies + j; + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqswait")); ret = wait_event_interruptible_timeout(rsp->gp_wq, - (rsp->gp_flags & RCU_GP_FLAG_FQS) || + ((gf = ACCESS_ONCE(rsp->gp_flags)) & + RCU_GP_FLAG_FQS) || (!ACCESS_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp)), j); @@ -1500,13 +1574,23 @@ static int __noreturn rcu_gp_kthread(void *arg) !rcu_preempt_blocked_readers_cgp(rnp)) break; /* If time for quiescent-state forcing, do it. */ - if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) { + if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) || + (gf & RCU_GP_FLAG_FQS)) { + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqsstart")); fqs_state = rcu_gp_fqs(rsp, fqs_state); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqsend")); cond_resched(); } else { /* Deal with stray signal. */ cond_resched(); flush_signals(current); + trace_rcu_grace_period(rsp->name, + ACCESS_ONCE(rsp->gpnum), + TPS("fqswaitsig")); } j = jiffies_till_next_fqs; if (j > HZ) { @@ -1554,6 +1638,8 @@ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, return; } rsp->gp_flags = RCU_GP_FLAG_INIT; + trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum), + TPS("newreq")); /* * We can't do wakeups while holding the rnp->lock, as that @@ -2255,7 +2341,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, * If called from an extended quiescent state, invoke the RCU * core in order to force a re-evaluation of RCU's idleness. */ - if (rcu_is_cpu_idle() && cpu_online(smp_processor_id())) + if (!rcu_is_watching() && cpu_online(smp_processor_id())) invoke_rcu_core(); /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ @@ -2725,10 +2811,13 @@ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) for_each_rcu_flavor(rsp) { rdp = per_cpu_ptr(rsp->rda, cpu); - if (rdp->qlen != rdp->qlen_lazy) + if (!rdp->nxtlist) + continue; + hc = true; + if (rdp->qlen != rdp->qlen_lazy || !all_lazy) { al = false; - if (rdp->nxtlist) - hc = true; + break; + } } if (all_lazy) *all_lazy = al; @@ -3216,7 +3305,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, /* * Compute the rcu_node tree geometry from kernel parameters. This cannot - * replace the definitions in rcutree.h because those are needed to size + * replace the definitions in tree.h because those are needed to size * the ->node array in the rcu_state structure. */ static void __init rcu_init_geometry(void) @@ -3295,8 +3384,8 @@ void __init rcu_init(void) rcu_bootup_announce(); rcu_init_geometry(); - rcu_init_one(&rcu_sched_state, &rcu_sched_data); rcu_init_one(&rcu_bh_state, &rcu_bh_data); + rcu_init_one(&rcu_sched_state, &rcu_sched_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); @@ -3311,4 +3400,4 @@ void __init rcu_init(void) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); } -#include "rcutree_plugin.h" +#include "tree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcu/tree.h index 5f97eab602cd..52be957c9fe2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcu/tree.h @@ -104,6 +104,8 @@ struct rcu_dynticks { /* idle-period nonlazy_posted snapshot. */ unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */ + unsigned long last_advance_all; + /* Last jiffy CBs were all advanced. */ int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ }; diff --git a/kernel/rcutree_plugin.h b/kernel/rcu/tree_plugin.h index 130c97b027f2..3822ac0c4b27 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -28,7 +28,7 @@ #include <linux/gfp.h> #include <linux/oom.h> #include <linux/smpboot.h> -#include "time/tick-internal.h" +#include "../time/tick-internal.h" #define RCU_KTHREAD_PRIO 1 @@ -96,10 +96,15 @@ static void __init rcu_bootup_announce_oddness(void) #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */ #ifdef CONFIG_RCU_NOCB_CPU_ALL pr_info("\tOffload RCU callbacks from all CPUs\n"); - cpumask_setall(rcu_nocb_mask); + cpumask_copy(rcu_nocb_mask, cpu_possible_mask); #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */ #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */ if (have_rcu_nocb_mask) { + if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { + pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); + cpumask_and(rcu_nocb_mask, cpu_possible_mask, + rcu_nocb_mask); + } cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); if (rcu_nocb_poll) @@ -660,7 +665,7 @@ static void rcu_preempt_check_callbacks(int cpu) static void rcu_preempt_do_callbacks(void) { - rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data)); + rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data)); } #endif /* #ifdef CONFIG_RCU_BOOST */ @@ -1128,7 +1133,7 @@ void exit_rcu(void) #ifdef CONFIG_RCU_BOOST -#include "rtmutex_common.h" +#include "../rtmutex_common.h" #ifdef CONFIG_RCU_TRACE @@ -1332,7 +1337,7 @@ static void invoke_rcu_callbacks_kthread(void) */ static bool rcu_is_callbacks_kthread(void) { - return __get_cpu_var(rcu_cpu_kthread_task) == current; + return __this_cpu_read(rcu_cpu_kthread_task) == current; } #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) @@ -1382,8 +1387,8 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, static void rcu_kthread_do_work(void) { - rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); - rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); + rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); + rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); rcu_preempt_do_callbacks(); } @@ -1402,7 +1407,7 @@ static void rcu_cpu_kthread_park(unsigned int cpu) static int rcu_cpu_kthread_should_run(unsigned int cpu) { - return __get_cpu_var(rcu_cpu_has_work); + return __this_cpu_read(rcu_cpu_has_work); } /* @@ -1412,8 +1417,8 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu) */ static void rcu_cpu_kthread(unsigned int cpu) { - unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status); - char work, *workp = &__get_cpu_var(rcu_cpu_has_work); + unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); + char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); int spincnt; for (spincnt = 0; spincnt < 10; spincnt++) { @@ -1630,17 +1635,23 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); extern int tick_nohz_enabled; /* - * Try to advance callbacks for all flavors of RCU on the current CPU. - * Afterwards, if there are any callbacks ready for immediate invocation, - * return true. + * Try to advance callbacks for all flavors of RCU on the current CPU, but + * only if it has been awhile since the last time we did so. Afterwards, + * if there are any callbacks ready for immediate invocation, return true. */ static bool rcu_try_advance_all_cbs(void) { bool cbs_ready = false; struct rcu_data *rdp; + struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); struct rcu_node *rnp; struct rcu_state *rsp; + /* Exit early if we advanced recently. */ + if (jiffies == rdtp->last_advance_all) + return 0; + rdtp->last_advance_all = jiffies; + for_each_rcu_flavor(rsp) { rdp = this_cpu_ptr(rsp->rda); rnp = rdp->mynode; @@ -1739,6 +1750,8 @@ static void rcu_prepare_for_idle(int cpu) */ if (rdtp->all_lazy && rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { + rdtp->all_lazy = false; + rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; invoke_rcu_core(); return; } @@ -1768,17 +1781,11 @@ static void rcu_prepare_for_idle(int cpu) */ static void rcu_cleanup_after_idle(int cpu) { - struct rcu_data *rdp; - struct rcu_state *rsp; if (rcu_is_nocb_cpu(cpu)) return; - rcu_try_advance_all_cbs(); - for_each_rcu_flavor(rsp) { - rdp = per_cpu_ptr(rsp->rda, cpu); - if (cpu_has_callbacks_ready_to_invoke(rdp)) - invoke_rcu_core(); - } + if (rcu_try_advance_all_cbs()) + invoke_rcu_core(); } /* @@ -2108,15 +2115,22 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, /* If we are not being polled and there is a kthread, awaken it ... */ t = ACCESS_ONCE(rdp->nocb_kthread); - if (rcu_nocb_poll | !t) + if (rcu_nocb_poll || !t) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WakeNotPoll")); return; + } len = atomic_long_read(&rdp->nocb_q_count); if (old_rhpp == &rdp->nocb_head) { wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ rdp->qlen_last_fqs_check = 0; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); } else if (len > rdp->qlen_last_fqs_check + qhimark) { wake_up_process(t); /* ... or if many callbacks queued. */ rdp->qlen_last_fqs_check = LONG_MAX / 2; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf")); + } else { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); } return; } @@ -2140,10 +2154,12 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, if (__is_kfree_rcu_offset((unsigned long)rhp->func)) trace_rcu_kfree_callback(rdp->rsp->name, rhp, (unsigned long)rhp->func, - rdp->qlen_lazy, rdp->qlen); + -atomic_long_read(&rdp->nocb_q_count_lazy), + -atomic_long_read(&rdp->nocb_q_count)); else trace_rcu_callback(rdp->rsp->name, rhp, - rdp->qlen_lazy, rdp->qlen); + -atomic_long_read(&rdp->nocb_q_count_lazy), + -atomic_long_read(&rdp->nocb_q_count)); return 1; } @@ -2221,6 +2237,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp) static int rcu_nocb_kthread(void *arg) { int c, cl; + bool firsttime = 1; struct rcu_head *list; struct rcu_head *next; struct rcu_head **tail; @@ -2229,14 +2246,27 @@ static int rcu_nocb_kthread(void *arg) /* Each pass through this loop invokes one batch of callbacks */ for (;;) { /* If not polling, wait for next batch of callbacks. */ - if (!rcu_nocb_poll) + if (!rcu_nocb_poll) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("Sleep")); wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head); + } else if (firsttime) { + firsttime = 0; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("Poll")); + } list = ACCESS_ONCE(rdp->nocb_head); if (!list) { + if (!rcu_nocb_poll) + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeEmpty")); schedule_timeout_interruptible(1); flush_signals(current); continue; } + firsttime = 1; + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeNonEmpty")); /* * Extract queued callbacks, update counts, and wait @@ -2257,7 +2287,11 @@ static int rcu_nocb_kthread(void *arg) next = list->next; /* Wait for enqueuing to complete, if needed. */ while (next == NULL && &list->next != tail) { + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WaitQueue")); schedule_timeout_interruptible(1); + trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, + TPS("WokeQueue")); next = list->next; } debug_rcu_head_unqueue(list); diff --git a/kernel/rcutree_trace.c b/kernel/rcu/tree_trace.c index cf6c17412932..3596797b7e46 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcu/tree_trace.c @@ -44,7 +44,7 @@ #include <linux/seq_file.h> #define RCU_TREE_NONCORE -#include "rcutree.h" +#include "tree.h" static int r_open(struct inode *inode, struct file *file, const struct seq_operations *op) diff --git a/kernel/rcupdate.c b/kernel/rcu/update.c index b02a339836b4..6cb3dff89e2b 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcu/update.c @@ -53,6 +53,12 @@ #include "rcu.h" +MODULE_ALIAS("rcupdate"); +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "rcupdate." + module_param(rcu_expedited, int, 0); #ifdef CONFIG_PREEMPT_RCU @@ -148,7 +154,7 @@ int rcu_read_lock_bh_held(void) { if (!debug_lockdep_rcu_enabled()) return 1; - if (rcu_is_cpu_idle()) + if (!rcu_is_watching()) return 0; if (!rcu_lockdep_current_cpu_online()) return 0; @@ -298,7 +304,7 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #endif int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ -int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; +static int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; module_param(rcu_cpu_stall_suppress, int, 0644); module_param(rcu_cpu_stall_timeout, int, 0644); |