diff options
-rw-r--r-- | kernel/rcu/srcutree.c | 59 |
1 files changed, 58 insertions, 1 deletions
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 4b98e6f45166..2286e06fd159 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -402,6 +402,7 @@ static void srcu_gp_start(struct srcu_struct *sp) rcu_seq_current(&sp->srcu_gp_seq)); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, rcu_seq_snap(&sp->srcu_gp_seq)); + smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ rcu_seq_start(&sp->srcu_gp_seq); state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); WARN_ON_ONCE(state != SRCU_STATE_SCAN1); @@ -677,6 +678,57 @@ static void srcu_flip(struct srcu_struct *sp) } /* + * If SRCU is likely idle, return true, otherwise return false. + * + * Note that it is OK for several current from-idle requests for a new + * grace period from idle to specify expediting because they will all end + * up requesting the same grace period anyhow. So no loss. + * + * Note also that if any CPU (including the current one) is still invoking + * callbacks, this function will nevertheless say "idle". This is not + * ideal, but the overhead of checking all CPUs' callback lists is even + * less ideal, especially on large systems. Furthermore, the wakeup + * can happen before the callback is fully removed, so we have no choice + * but to accept this type of error. + * + * This function is also subject to counter-wrap errors, but let's face + * it, if this function was preempted for enough time for the counters + * to wrap, it really doesn't matter whether or not we expedite the grace + * period. The extra overhead of a needlessly expedited grace period is + * negligible when amoritized over that time period, and the extra latency + * of a needlessly non-expedited grace period is similarly negligible. + */ +static bool srcu_might_be_idle(struct srcu_struct *sp) +{ + unsigned long flags; + struct srcu_data *sdp; + unsigned long curseq; + + /* If the local srcu_data structure has callbacks, not idle. */ + local_irq_save(flags); + sdp = this_cpu_ptr(sp->sda); + if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { + local_irq_restore(flags); + return false; /* Callbacks already present, so not idle. */ + } + local_irq_restore(flags); + + /* + * No local callbacks, so probabalistically probe global state. + * Exact information would require acquiring locks, which would + * kill scalability, hence the probabalistic nature of the probe. + */ + curseq = rcu_seq_current(&sp->srcu_gp_seq); + smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ + if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) + return false; /* Grace period in progress, so not idle. */ + smp_mb(); /* Order ->srcu_gp_seq with prior access. */ + if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) + return false; /* GP # changed, so not idle. */ + return true; /* With reasonable probability, idle! */ +} + +/* * Enqueue an SRCU callback on the srcu_data structure associated with * the current CPU and the specified srcu_struct structure, initiating * grace-period processing if it is not already running. @@ -823,10 +875,15 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); * Of course, these memory-ordering guarantees apply only when * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are * passed the same srcu_struct structure. + * + * If SRCU is likely idle, expedite the first request. This semantic + * was provided by Classic SRCU, and is relied upon by its users, so TREE + * SRCU must also provide it. Note that detecting idleness is heuristic + * and subject to both false positives and negatives. */ void synchronize_srcu(struct srcu_struct *sp) { - if (rcu_gp_is_expedited()) + if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) synchronize_srcu_expedited(sp); else __synchronize_srcu(sp, true); |