rcu: Improve SRCU's wait_idx() comments

[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / srcu.c
diff --git a/kernel/srcu.c b/kernel/srcu.c

index 17e95bc..1fecb4d 100644 (file)
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -138,14 +138,14 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
  
         /*
          * Now, we check the ->snap array that srcu_readers_active_idx()
-        * filled in from the per-CPU counter values.  Since both
-        * __srcu_read_lock() and __srcu_read_unlock() increment the
-        * upper bits of the per-CPU counter, an increment/decrement
-        * pair will change the value of the counter.  Since there is
-        * only one possible increment, the only way to wrap the counter
-        * is to have a huge number of counter decrements, which requires
-        * a huge number of tasks and huge SRCU read-side critical-section
-        * nesting levels, even on 32-bit systems.
+        * filled in from the per-CPU counter values. Since
+        * __srcu_read_lock() increments the upper bits of the per-CPU
+        * counter, an increment/decrement pair will change the value
+        * of the counter.  Since there is only one possible increment,
+        * the only way to wrap the counter is to have a huge number of
+        * counter decrements, which requires a huge number of tasks and
+        * huge SRCU read-side critical-section nesting levels, even on
+        * 32-bit systems.
          *
          * All of the ways of confusing the readings require that the scan
          * in srcu_readers_active_idx() see the read-side task's decrement,
@@ -234,8 +234,7 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
  {
         preempt_disable();
         smp_mb(); /* C */  /* Avoid leaking the critical section. */
-       ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) +=
-               SRCU_USAGE_COUNT - 1;
+       ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
         preempt_enable();
  }
  EXPORT_SYMBOL_GPL(__srcu_read_unlock);
@@ -251,25 +250,15 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
  #define SYNCHRONIZE_SRCU_READER_DELAY 5
  
  /*
- * Flip the readers' index by incrementing ->completed, then wait
- * until there are no more readers using the counters referenced by
- * the old index value.  (Recall that the index is the bottom bit
- * of ->completed.)
- *
- * Of course, it is possible that a reader might be delayed for the
- * full duration of flip_idx_and_wait() between fetching the
- * index and incrementing its counter.  This possibility is handled
- * by __synchronize_srcu() invoking flip_idx_and_wait() twice.
+ * Wait until all pre-existing readers complete.  Such readers
+ * will have used the index specified by "idx".
   */
-static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
+static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
  {
-       int idx;
         int trycount = 0;
  
-       idx = sp->completed++ & 0x1;
-
         /*
-        * If a reader fetches the index before the above increment,
+        * If a reader fetches the index before the ->completed increment,
          * but increments its counter after srcu_readers_active_idx_check()
          * sums it, then smp_mb() D will pair with __srcu_read_lock()'s
          * smp_mb() B to ensure that the SRCU read-side critical section
@@ -299,16 +288,24 @@ static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
          * sees srcu_read_unlock()'s counter decrement, then any
          * of the current task's subsequent code will happen after
          * that SRCU read-side critical section.
+        *
+        * It also ensures the order between the above waiting and
+        * the next flipping.
          */
         smp_mb(); /* E */
  }
  
+static void srcu_flip(struct srcu_struct *sp)
+{
+       sp->completed++;
+}
+
  /*
   * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
   */
  static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
  {
-       int idx = 0;
+       int busy_idx;
  
         rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
                            !lock_is_held(&rcu_bh_lock_map) &&
@@ -317,29 +314,48 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
                            "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
  
         mutex_lock(&sp->mutex);
+       busy_idx = sp->completed & 0X1UL;
  
         /*
-        * If there were no helpers, then we need to do two flips of
-        * the index.  The first flip is required if there are any
-        * outstanding SRCU readers even if there are no new readers
-        * running concurrently with the first counter flip.
+        * If we recently flipped the index, there will be some readers
+        * using idx=0 and others using idx=1.  Therefore, two calls to
+        * wait_idx()s suffice to ensure that all pre-existing readers
+        * have completed:
+        *
+        * __synchronize_srcu() {
+        *      wait_idx(sp, 0, expedited);
+        *      wait_idx(sp, 1, expedited);
+        * }
+        *
+        * Starvation is prevented by the fact that we flip the index.
+        * While we wait on one index to clear out, almost all new readers
+        * will be using the other index.  The number of new readers using the
+        * index we are waiting on is sharply bounded by roughly the number
+        * of CPUs.
+        *
+        * How can new readers possibly using the old pre-flip value of
+        * the index?  Consider the following sequence of events:
          *
-        * The second flip is required when a new reader picks up
-        * the old value of the index, but does not increment its
-        * counter until after its counters is summed/rechecked by
-        * srcu_readers_active_idx_check().  In this case, the current SRCU
-        * grace period would be OK because the SRCU read-side critical
-        * section started after this SRCU grace period started, so the
-        * grace period is not required to wait for the reader.
+        * Suppose that during the previous grace period, a reader
+        * picked up the old value of the index, but did not increment
+        * its counter until after the previous instance of
+        * __synchronize_srcu() did the counter summation and recheck.
+        * That previous grace period was OK because the reader did
+        * not start until after the grace period started, so the grace
+        * period was not obligated to wait for that reader.
          *
-        * However, the next SRCU grace period would be waiting for the
-        * other set of counters to go to zero, and therefore would not
-        * wait for the reader, which would be very bad.  To avoid this
-        * bad scenario, we flip and wait twice, clearing out both sets
-        * of counters.
+        * However, this sequence of events is quite improbable, so
+        * this call to wait_idx(), which waits on really old readers
+        * describe in this comment above, will almost never need to wait.
          */
-       for (; idx < 2; idx++)
-               flip_idx_and_wait(sp, expedited);
+       wait_idx(sp, 1 - busy_idx, expedited);
+
+       /* Flip the index to avoid reader-induced starvation. */
+       srcu_flip(sp);
+
+       /* Wait for recent pre-existing readers. */
+       wait_idx(sp, busy_idx, expedited);
+
         mutex_unlock(&sp->mutex);
  }