Merge tag 'trace-ipi-tracepoints' of git://git.kernel.org/pub/scm/linux/kernel/git...

[platform/kernel/linux-rpi.git] / kernel / rcu / tree.c
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 625d0b0..1b70cb6 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1013,10 +1013,7 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
  }
  
  /*
- * Dump stacks of all tasks running on stalled CPUs.  This is a fallback
- * for architectures that do not implement trigger_all_cpu_backtrace().
- * The NMI-triggered stack traces are more accurate because they are
- * printed by the target CPU.
+ * Dump stacks of all tasks running on stalled CPUs.
   */
  static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
  {
@@ -1094,7 +1091,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
                (long)rsp->gpnum, (long)rsp->completed, totqlen);
         if (ndetected == 0)
                 pr_err("INFO: Stall ended before state dump start\n");
-       else if (!trigger_all_cpu_backtrace())
+       else
                 rcu_dump_cpu_stacks(rsp);
  
         /* Complain about tasks blocking the grace period. */
@@ -1125,8 +1122,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
         pr_cont(" (t=%lu jiffies g=%ld c=%ld q=%lu)\n",
                 jiffies - rsp->gp_start,
                 (long)rsp->gpnum, (long)rsp->completed, totqlen);
-       if (!trigger_all_cpu_backtrace())
-               dump_stack();
+       rcu_dump_cpu_stacks(rsp);
  
         raw_spin_lock_irqsave(&rnp->lock, flags);
         if (ULONG_CMP_GE(jiffies, ACCESS_ONCE(rsp->jiffies_stall)))
@@ -1305,10 +1301,16 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
          * believe that a grace period is in progress, then we must wait
          * for the one following, which is in "c".  Because our request
          * will be noticed at the end of the current grace period, we don't
-        * need to explicitly start one.
+        * need to explicitly start one.  We only do the lockless check
+        * of rnp_root's fields if the current rcu_node structure thinks
+        * there is no grace period in flight, and because we hold rnp->lock,
+        * the only possible change is when rnp_root's two fields are
+        * equal, in which case rnp_root->gpnum might be concurrently
+        * incremented.  But that is OK, as it will just result in our
+        * doing some extra useless work.
          */
         if (rnp->gpnum != rnp->completed ||
-           ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
+           ACCESS_ONCE(rnp_root->gpnum) != ACCESS_ONCE(rnp_root->completed)) {
                 rnp->need_future_gp[c & 0x1]++;
                 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
                 goto out;
@@ -1645,11 +1647,6 @@ static int rcu_gp_init(struct rcu_state *rsp)
                                             rnp->level, rnp->grplo,
                                             rnp->grphi, rnp->qsmask);
                 raw_spin_unlock_irq(&rnp->lock);
-#ifdef CONFIG_PROVE_RCU_DELAY
-               if ((prandom_u32() % (rcu_num_nodes + 1)) == 0 &&
-                   system_state == SYSTEM_RUNNING)
-                       udelay(200);
-#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
                 cond_resched();
         }
  
@@ -2347,7 +2344,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
         }
         smp_mb(); /* List handling before counting for rcu_barrier(). */
         rdp->qlen_lazy -= count_lazy;
-       ACCESS_ONCE(rdp->qlen) -= count;
+       ACCESS_ONCE(rdp->qlen) = rdp->qlen - count;
         rdp->n_cbs_invoked += count;
  
         /* Reinstate batch limit if we have worked down the excess. */
@@ -2485,14 +2482,14 @@ static void force_quiescent_state(struct rcu_state *rsp)
         struct rcu_node *rnp_old = NULL;
  
         /* Funnel through hierarchy to reduce memory contention. */
-       rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
+       rnp = __this_cpu_read(rsp->rda->mynode);
         for (; rnp != NULL; rnp = rnp->parent) {
                 ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
                       !raw_spin_trylock(&rnp->fqslock);
                 if (rnp_old != NULL)
                         raw_spin_unlock(&rnp_old->fqslock);
                 if (ret) {
-                       ACCESS_ONCE(rsp->n_force_qs_lh)++;
+                       rsp->n_force_qs_lh++;
                         return;
                 }
                 rnp_old = rnp;
@@ -2504,7 +2501,7 @@ static void force_quiescent_state(struct rcu_state *rsp)
         smp_mb__after_unlock_lock();
         raw_spin_unlock(&rnp_old->fqslock);
         if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
-               ACCESS_ONCE(rsp->n_force_qs_lh)++;
+               rsp->n_force_qs_lh++;
                 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
                 return;  /* Someone beat us to it. */
         }
@@ -2662,7 +2659,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
         unsigned long flags;
         struct rcu_data *rdp;
  
-       WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
+       WARN_ON_ONCE((unsigned long)head & 0x1); /* Misaligned rcu_head! */
         if (debug_rcu_head_queue(head)) {
                 /* Probable double call_rcu(), so leak the callback. */
                 ACCESS_ONCE(head->func) = rcu_leak_callback;
@@ -2693,7 +2690,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
                 local_irq_restore(flags);
                 return;
         }
-       ACCESS_ONCE(rdp->qlen)++;
+       ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
         if (lazy)
                 rdp->qlen_lazy++;
         else
@@ -3257,7 +3254,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
          * ACCESS_ONCE() to prevent the compiler from speculating
          * the increment to precede the early-exit check.
          */
-       ACCESS_ONCE(rsp->n_barrier_done)++;
+       ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
         WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 1);
         _rcu_barrier_trace(rsp, "Inc1", -1, rsp->n_barrier_done);
         smp_mb(); /* Order ->n_barrier_done increment with below mechanism. */
@@ -3307,7 +3304,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
  
         /* Increment ->n_barrier_done to prevent duplicate work. */
         smp_mb(); /* Keep increment after above mechanism. */
-       ACCESS_ONCE(rsp->n_barrier_done)++;
+       ACCESS_ONCE(rsp->n_barrier_done) = rsp->n_barrier_done + 1;
         WARN_ON_ONCE((rsp->n_barrier_done & 0x1) != 0);
         _rcu_barrier_trace(rsp, "Inc2", -1, rsp->n_barrier_done);
         smp_mb(); /* Keep increment before caller's subsequent code. */
@@ -3564,14 +3561,16 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
  static void __init rcu_init_one(struct rcu_state *rsp,
                 struct rcu_data __percpu *rda)
  {
-       static char *buf[] = { "rcu_node_0",
-                              "rcu_node_1",
-                              "rcu_node_2",
-                              "rcu_node_3" };  /* Match MAX_RCU_LVLS */
-       static char *fqs[] = { "rcu_node_fqs_0",
-                              "rcu_node_fqs_1",
-                              "rcu_node_fqs_2",
-                              "rcu_node_fqs_3" };  /* Match MAX_RCU_LVLS */
+       static const char * const buf[] = {
+               "rcu_node_0",
+               "rcu_node_1",
+               "rcu_node_2",
+               "rcu_node_3" };  /* Match MAX_RCU_LVLS */
+       static const char * const fqs[] = {
+               "rcu_node_fqs_0",
+               "rcu_node_fqs_1",
+               "rcu_node_fqs_2",
+               "rcu_node_fqs_3" };  /* Match MAX_RCU_LVLS */
         static u8 fl_mask = 0x1;
         int cpustride = 1;
         int i;