rcu: Improve SRCU's wait_idx() comments

author Lai Jiangshan <laijs@cn.fujitsu.com>

Mon, 27 Feb 2012 17:28:10 +0000 (09:28 -0800)

committer Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Mon, 30 Apr 2012 17:48:22 +0000 (10:48 -0700)
author Lai Jiangshan <laijs@cn.fujitsu.com>
Mon, 27 Feb 2012 17:28:10 +0000 (09:28 -0800)
committer Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Mon, 30 Apr 2012 17:48:22 +0000 (10:48 -0700)
diff --git a/kernel/srcu.c b/kernel/srcu.c

index b6b9ea2..1fecb4d 100644 (file)
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -249,6 +249,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
   */
  #define SYNCHRONIZE_SRCU_READER_DELAY 5
  
+/*
+ * Wait until all pre-existing readers complete.  Such readers
+ * will have used the index specified by "idx".
+ */
  static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
  {
         int trycount = 0;
@@ -291,24 +295,9 @@ static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
         smp_mb(); /* E */
  }
  
-/*
- * Flip the readers' index by incrementing ->completed, then wait
- * until there are no more readers using the counters referenced by
- * the old index value.  (Recall that the index is the bottom bit
- * of ->completed.)
- *
- * Of course, it is possible that a reader might be delayed for the
- * full duration of flip_idx_and_wait() between fetching the
- * index and incrementing its counter.  This possibility is handled
- * by the next __synchronize_srcu() invoking wait_idx() for such readers
- * before starting a new grace period.
- */
-static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
+static void srcu_flip(struct srcu_struct *sp)
  {
-       int idx;
-
-       idx = sp->completed++ & 0x1;
-       wait_idx(sp, idx, expedited);
+       sp->completed++;
  }
  
  /*
@@ -316,6 +305,8 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
   */
  static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
  {
+       int busy_idx;
+
         rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
                            !lock_is_held(&rcu_bh_lock_map) &&
                            !lock_is_held(&rcu_lock_map) &&
@@ -323,8 +314,28 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
                            "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
  
         mutex_lock(&sp->mutex);
+       busy_idx = sp->completed & 0X1UL;
  
         /*
+        * If we recently flipped the index, there will be some readers
+        * using idx=0 and others using idx=1.  Therefore, two calls to
+        * wait_idx()s suffice to ensure that all pre-existing readers
+        * have completed:
+        *
+        * __synchronize_srcu() {
+        *      wait_idx(sp, 0, expedited);
+        *      wait_idx(sp, 1, expedited);
+        * }
+        *
+        * Starvation is prevented by the fact that we flip the index.
+        * While we wait on one index to clear out, almost all new readers
+        * will be using the other index.  The number of new readers using the
+        * index we are waiting on is sharply bounded by roughly the number
+        * of CPUs.
+        *
+        * How can new readers possibly using the old pre-flip value of
+        * the index?  Consider the following sequence of events:
+        *
          * Suppose that during the previous grace period, a reader
          * picked up the old value of the index, but did not increment
          * its counter until after the previous instance of
@@ -333,31 +344,17 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
          * not start until after the grace period started, so the grace
          * period was not obligated to wait for that reader.
          *
-        * However, the current SRCU grace period does have to wait for
-        * that reader.  This is handled by invoking wait_idx() on the
-        * non-active set of counters (hence sp->completed - 1).  Once
-        * wait_idx() returns, we know that all readers that picked up
-        * the old value of ->completed and that already incremented their
-        * counter will have completed.
-        *
-        * But what about readers that picked up the old value of
-        * ->completed, but -still- have not managed to increment their
-        * counter?  We do not need to wait for those readers, because
-        * they will have started their SRCU read-side critical section
-        * after the current grace period starts.
-        *
-        * Because it is unlikely that readers will be preempted between
-        * fetching ->completed and incrementing their counter, wait_idx()
-        * will normally not need to wait.
+        * However, this sequence of events is quite improbable, so
+        * this call to wait_idx(), which waits on really old readers
+        * describe in this comment above, will almost never need to wait.
          */
-       wait_idx(sp, (sp->completed - 1) & 0x1, expedited);
+       wait_idx(sp, 1 - busy_idx, expedited);
  
-       /*
-        * Now that wait_idx() has waited for the really old readers,
-        * invoke flip_idx_and_wait() to flip the counter and wait
-        * for current SRCU readers.
-        */
-       flip_idx_and_wait(sp, expedited);
+       /* Flip the index to avoid reader-induced starvation. */
+       srcu_flip(sp);
+
+       /* Wait for recent pre-existing readers. */
+       wait_idx(sp, busy_idx, expedited);
  
         mutex_unlock(&sp->mutex);
  }
author	Lai Jiangshan <laijs@cn.fujitsu.com>
	Mon, 27 Feb 2012 17:28:10 +0000 (09:28 -0800)
committer	Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	Mon, 30 Apr 2012 17:48:22 +0000 (10:48 -0700)