1 files changed, 56 insertions, 36 deletions
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 43f1d61e513e..b6b9ea2eb51c 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -249,26 +249,12 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  */
 #define SYNCHRONIZE_SRCU_READER_DELAY 5
 
-/*
- * Flip the readers' index by incrementing ->completed, then wait
- * until there are no more readers using the counters referenced by
- * the old index value.  (Recall that the index is the bottom bit
- * of ->completed.)
- *
- * Of course, it is possible that a reader might be delayed for the
- * full duration of flip_idx_and_wait() between fetching the
- * index and incrementing its counter.  This possibility is handled
- * by __synchronize_srcu() invoking flip_idx_and_wait() twice.
- */
-static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
+static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
 {
-	int idx;
 	int trycount = 0;
 
-	idx = sp->completed++ & 0x1;
-
 	/*
-	 * If a reader fetches the index before the above increment,
+	 * If a reader fetches the index before the ->completed increment,
 	 * but increments its counter after srcu_readers_active_idx_check()
 	 * sums it, then smp_mb() D will pair with __srcu_read_lock()'s
 	 * smp_mb() B to ensure that the SRCU read-side critical section
@@ -298,17 +284,38 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
 	 * sees srcu_read_unlock()'s counter decrement, then any
 	 * of the current task's subsequent code will happen after
 	 * that SRCU read-side critical section.
+	 *
+	 * It also ensures the order between the above waiting and
+	 * the next flipping.
 	 */
 	smp_mb(); /* E */
 }
 
 /*
+ * Flip the readers' index by incrementing ->completed, then wait
+ * until there are no more readers using the counters referenced by
+ * the old index value.  (Recall that the index is the bottom bit
+ * of ->completed.)
+ *
+ * Of course, it is possible that a reader might be delayed for the
+ * full duration of flip_idx_and_wait() between fetching the
+ * index and incrementing its counter.  This possibility is handled
+ * by the next __synchronize_srcu() invoking wait_idx() for such readers
+ * before starting a new grace period.
+ */
+static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
+{
+	int idx;
+
+	idx = sp->completed++ & 0x1;
+	wait_idx(sp, idx, expedited);
+}
+
+/*
  * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
  */
 static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
 {
-	int idx = 0;
-
 	rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
 			   !lock_is_held(&rcu_bh_lock_map) &&
 			   !lock_is_held(&rcu_lock_map) &&
@@ -318,27 +325,40 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
 	mutex_lock(&sp->mutex);
 
 	/*
-	 * If there were no helpers, then we need to do two flips of
-	 * the index.  The first flip is required if there are any
-	 * outstanding SRCU readers even if there are no new readers
-	 * running concurrently with the first counter flip.
+	 * Suppose that during the previous grace period, a reader
+	 * picked up the old value of the index, but did not increment
+	 * its counter until after the previous instance of
+	 * __synchronize_srcu() did the counter summation and recheck.
+	 * That previous grace period was OK because the reader did
+	 * not start until after the grace period started, so the grace
+	 * period was not obligated to wait for that reader.
+	 *
+	 * However, the current SRCU grace period does have to wait for
+	 * that reader.  This is handled by invoking wait_idx() on the
+	 * non-active set of counters (hence sp->completed - 1).  Once
+	 * wait_idx() returns, we know that all readers that picked up
+	 * the old value of ->completed and that already incremented their
+	 * counter will have completed.
 	 *
-	 * The second flip is required when a new reader picks up
-	 * the old value of the index, but does not increment its
-	 * counter until after its counters is summed/rechecked by
-	 * srcu_readers_active_idx_check().  In this case, the current SRCU
-	 * grace period would be OK because the SRCU read-side critical
-	 * section started after this SRCU grace period started, so the
-	 * grace period is not required to wait for the reader.
+	 * But what about readers that picked up the old value of
+	 * ->completed, but -still- have not managed to increment their
+	 * counter?  We do not need to wait for those readers, because
+	 * they will have started their SRCU read-side critical section
+	 * after the current grace period starts.
 	 *
-	 * However, the next SRCU grace period would be waiting for the
-	 * other set of counters to go to zero, and therefore would not
-	 * wait for the reader, which would be very bad.  To avoid this
-	 * bad scenario, we flip and wait twice, clearing out both sets
-	 * of counters.
+	 * Because it is unlikely that readers will be preempted between
+	 * fetching ->completed and incrementing their counter, wait_idx()
+	 * will normally not need to wait.
 	 */
-	for (; idx < 2; idx++)
-		flip_idx_and_wait(sp, expedited);
+	wait_idx(sp, (sp->completed - 1) & 0x1, expedited);
+
+	/*
+	 * Now that wait_idx() has waited for the really old readers,
+	 * invoke flip_idx_and_wait() to flip the counter and wait
+	 * for current SRCU readers.
+	 */
+	flip_idx_and_wait(sp, expedited);
+
 	mutex_unlock(&sp->mutex);
 }