/*
* Now, we check the ->snap array that srcu_readers_active_idx()
- * filled in from the per-CPU counter values. Since both
- * __srcu_read_lock() and __srcu_read_unlock() increment the
- * upper bits of the per-CPU counter, an increment/decrement
- * pair will change the value of the counter. Since there is
- * only one possible increment, the only way to wrap the counter
- * is to have a huge number of counter decrements, which requires
- * a huge number of tasks and huge SRCU read-side critical-section
- * nesting levels, even on 32-bit systems.
+ * filled in from the per-CPU counter values. Since
+ * __srcu_read_lock() increments the upper bits of the per-CPU
+ * counter, an increment/decrement pair will change the value
+ * of the counter. Since there is only one possible increment,
+ * the only way to wrap the counter is to have a huge number of
+ * counter decrements, which requires a huge number of tasks and
+ * huge SRCU read-side critical-section nesting levels, even on
+ * 32-bit systems.
*
* All of the ways of confusing the readings require that the scan
* in srcu_readers_active_idx() see the read-side task's decrement,
{
preempt_disable();
smp_mb(); /* C */ /* Avoid leaking the critical section. */
- ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) +=
- SRCU_USAGE_COUNT - 1;
+ ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1;
preempt_enable();
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
#define SYNCHRONIZE_SRCU_READER_DELAY 5
/*
- * Flip the readers' index by incrementing ->completed, then wait
- * until there are no more readers using the counters referenced by
- * the old index value. (Recall that the index is the bottom bit
- * of ->completed.)
- *
- * Of course, it is possible that a reader might be delayed for the
- * full duration of flip_idx_and_wait() between fetching the
- * index and incrementing its counter. This possibility is handled
- * by __synchronize_srcu() invoking flip_idx_and_wait() twice.
+ * Wait until all pre-existing readers complete. Such readers
+ * will have used the index specified by "idx".
*/
-static void flip_idx_and_wait(struct srcu_struct *sp, bool expedited)
+static void wait_idx(struct srcu_struct *sp, int idx, bool expedited)
{
- int idx;
int trycount = 0;
- idx = sp->completed++ & 0x1;
-
/*
- * If a reader fetches the index before the above increment,
+ * If a reader fetches the index before the ->completed increment,
* but increments its counter after srcu_readers_active_idx_check()
* sums it, then smp_mb() D will pair with __srcu_read_lock()'s
* smp_mb() B to ensure that the SRCU read-side critical section
* sees srcu_read_unlock()'s counter decrement, then any
* of the current task's subsequent code will happen after
* that SRCU read-side critical section.
+ *
+ * It also ensures the order between the above waiting and
+ * the next flipping.
*/
smp_mb(); /* E */
}
+static void srcu_flip(struct srcu_struct *sp)
+{
+ sp->completed++;
+}
+
/*
* Helper function for synchronize_srcu() and synchronize_srcu_expedited().
*/
static void __synchronize_srcu(struct srcu_struct *sp, bool expedited)
{
- int idx = 0;
+ int busy_idx;
rcu_lockdep_assert(!lock_is_held(&sp->dep_map) &&
!lock_is_held(&rcu_bh_lock_map) &&
"Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section");
mutex_lock(&sp->mutex);
+ busy_idx = sp->completed & 0X1UL;
/*
- * If there were no helpers, then we need to do two flips of
- * the index. The first flip is required if there are any
- * outstanding SRCU readers even if there are no new readers
- * running concurrently with the first counter flip.
+ * If we recently flipped the index, there will be some readers
+ * using idx=0 and others using idx=1. Therefore, two calls to
+ * wait_idx()s suffice to ensure that all pre-existing readers
+ * have completed:
+ *
+ * __synchronize_srcu() {
+ * wait_idx(sp, 0, expedited);
+ * wait_idx(sp, 1, expedited);
+ * }
+ *
+ * Starvation is prevented by the fact that we flip the index.
+ * While we wait on one index to clear out, almost all new readers
+ * will be using the other index. The number of new readers using the
+ * index we are waiting on is sharply bounded by roughly the number
+ * of CPUs.
+ *
+ * How can new readers possibly using the old pre-flip value of
+ * the index? Consider the following sequence of events:
*
- * The second flip is required when a new reader picks up
- * the old value of the index, but does not increment its
- * counter until after its counters is summed/rechecked by
- * srcu_readers_active_idx_check(). In this case, the current SRCU
- * grace period would be OK because the SRCU read-side critical
- * section started after this SRCU grace period started, so the
- * grace period is not required to wait for the reader.
+ * Suppose that during the previous grace period, a reader
+ * picked up the old value of the index, but did not increment
+ * its counter until after the previous instance of
+ * __synchronize_srcu() did the counter summation and recheck.
+ * That previous grace period was OK because the reader did
+ * not start until after the grace period started, so the grace
+ * period was not obligated to wait for that reader.
*
- * However, the next SRCU grace period would be waiting for the
- * other set of counters to go to zero, and therefore would not
- * wait for the reader, which would be very bad. To avoid this
- * bad scenario, we flip and wait twice, clearing out both sets
- * of counters.
+ * However, this sequence of events is quite improbable, so
+ * this call to wait_idx(), which waits on really old readers
+ * describe in this comment above, will almost never need to wait.
*/
- for (; idx < 2; idx++)
- flip_idx_and_wait(sp, expedited);
+ wait_idx(sp, 1 - busy_idx, expedited);
+
+ /* Flip the index to avoid reader-induced starvation. */
+ srcu_flip(sp);
+
+ /* Wait for recent pre-existing readers. */
+ wait_idx(sp, busy_idx, expedited);
+
mutex_unlock(&sp->mutex);
}