Merge branch 'gp.2013.09.25a' into HEAD
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Tue, 15 Oct 2013 19:47:04 +0000 (12:47 -0700)
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>
Tue, 15 Oct 2013 19:47:04 +0000 (12:47 -0700)
gp.2013.09.25a: Topic branch for grace-period updates.

1  2 
kernel/rcutiny.c
kernel/rcutree.c
kernel/rcutree_plugin.h

diff --combined kernel/rcutiny.c
@@@ -35,7 -35,6 +35,7 @@@
  #include <linux/time.h>
  #include <linux/cpu.h>
  #include <linux/prefetch.h>
 +#include <linux/ftrace_event.h>
  
  #ifdef CONFIG_RCU_TRACE
  #include <trace/events/rcu.h>
@@@ -59,17 -58,16 +59,17 @@@ static long long rcu_dynticks_nesting 
  static void rcu_idle_enter_common(long long newval)
  {
        if (newval) {
 -              RCU_TRACE(trace_rcu_dyntick("--=",
 +              RCU_TRACE(trace_rcu_dyntick(TPS("--="),
                                            rcu_dynticks_nesting, newval));
                rcu_dynticks_nesting = newval;
                return;
        }
 -      RCU_TRACE(trace_rcu_dyntick("Start", rcu_dynticks_nesting, newval));
 +      RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
 +                                  rcu_dynticks_nesting, newval));
        if (!is_idle_task(current)) {
                struct task_struct *idle = idle_task(smp_processor_id());
  
 -              RCU_TRACE(trace_rcu_dyntick("Error on entry: not idle task",
 +              RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
                                            rcu_dynticks_nesting, newval));
                ftrace_dump(DUMP_ALL);
                WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@@ -122,15 -120,15 +122,15 @@@ EXPORT_SYMBOL_GPL(rcu_irq_exit)
  static void rcu_idle_exit_common(long long oldval)
  {
        if (oldval) {
 -              RCU_TRACE(trace_rcu_dyntick("++=",
 +              RCU_TRACE(trace_rcu_dyntick(TPS("++="),
                                            oldval, rcu_dynticks_nesting));
                return;
        }
 -      RCU_TRACE(trace_rcu_dyntick("End", oldval, rcu_dynticks_nesting));
 +      RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
        if (!is_idle_task(current)) {
                struct task_struct *idle = idle_task(smp_processor_id());
  
 -              RCU_TRACE(trace_rcu_dyntick("Error on exit: not idle task",
 +              RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
                          oldval, rcu_dynticks_nesting));
                ftrace_dump(DUMP_ALL);
                WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s",
@@@ -275,7 -273,7 +275,7 @@@ static void __rcu_process_callbacks(str
        if (&rcp->rcucblist == rcp->donetail) {
                RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
                RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
-                                             ACCESS_ONCE(rcp->rcucblist),
+                                             !!ACCESS_ONCE(rcp->rcucblist),
                                              need_resched(),
                                              is_idle_task(current),
                                              false));
                RCU_TRACE(cb_count++);
        }
        RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
 -      RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count, 0, need_resched(),
 +      RCU_TRACE(trace_rcu_batch_end(rcp->name,
 +                                    cb_count, 0, need_resched(),
                                      is_idle_task(current),
                                      false));
  }
diff --combined kernel/rcutree.c
  
  #include "rcu.h"
  
 -/*
 - * Strings used in tracepoints need to be exported via the
 - * tracing system such that tools like perf and trace-cmd can
 - * translate the string address pointers to actual text.
 - */
 -#define TPS(x)        tracepoint_string(x)
 -
  /* Data structures. */
  
  static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
@@@ -215,7 -222,7 +215,7 @@@ void rcu_note_context_switch(int cpu
  }
  EXPORT_SYMBOL_GPL(rcu_note_context_switch);
  
 -DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 +static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
        .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
        .dynticks = ATOMIC_INIT(1),
  #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
@@@ -364,8 -371,7 +364,8 @@@ static void rcu_eqs_enter_common(struc
  {
        trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
        if (!user && !is_idle_task(current)) {
 -              struct task_struct *idle = idle_task(smp_processor_id());
 +              struct task_struct *idle __maybe_unused =
 +                      idle_task(smp_processor_id());
  
                trace_rcu_dyntick(TPS("Error on entry: not idle task"), oldval, 0);
                ftrace_dump(DUMP_ORIG);
@@@ -401,7 -407,7 +401,7 @@@ static void rcu_eqs_enter(bool user
        long long oldval;
        struct rcu_dynticks *rdtp;
  
 -      rdtp = &__get_cpu_var(rcu_dynticks);
 +      rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
        if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
@@@ -429,7 -435,7 +429,7 @@@ void rcu_idle_enter(void
  
        local_irq_save(flags);
        rcu_eqs_enter(false);
 -      rcu_sysidle_enter(&__get_cpu_var(rcu_dynticks), 0);
 +      rcu_sysidle_enter(this_cpu_ptr(&rcu_dynticks), 0);
        local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@@ -472,7 -478,7 +472,7 @@@ void rcu_irq_exit(void
        struct rcu_dynticks *rdtp;
  
        local_irq_save(flags);
 -      rdtp = &__get_cpu_var(rcu_dynticks);
 +      rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        rdtp->dynticks_nesting--;
        WARN_ON_ONCE(rdtp->dynticks_nesting < 0);
@@@ -502,8 -508,7 +502,8 @@@ static void rcu_eqs_exit_common(struct 
        rcu_cleanup_after_idle(smp_processor_id());
        trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
        if (!user && !is_idle_task(current)) {
 -              struct task_struct *idle = idle_task(smp_processor_id());
 +              struct task_struct *idle __maybe_unused =
 +                      idle_task(smp_processor_id());
  
                trace_rcu_dyntick(TPS("Error on exit: not idle task"),
                                  oldval, rdtp->dynticks_nesting);
@@@ -523,7 -528,7 +523,7 @@@ static void rcu_eqs_exit(bool user
        struct rcu_dynticks *rdtp;
        long long oldval;
  
 -      rdtp = &__get_cpu_var(rcu_dynticks);
 +      rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        WARN_ON_ONCE(oldval < 0);
        if (oldval & DYNTICK_TASK_NEST_MASK)
@@@ -550,7 -555,7 +550,7 @@@ void rcu_idle_exit(void
  
        local_irq_save(flags);
        rcu_eqs_exit(false);
 -      rcu_sysidle_exit(&__get_cpu_var(rcu_dynticks), 0);
 +      rcu_sysidle_exit(this_cpu_ptr(&rcu_dynticks), 0);
        local_irq_restore(flags);
  }
  EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@@ -594,7 -599,7 +594,7 @@@ void rcu_irq_enter(void
        long long oldval;
  
        local_irq_save(flags);
 -      rdtp = &__get_cpu_var(rcu_dynticks);
 +      rdtp = this_cpu_ptr(&rcu_dynticks);
        oldval = rdtp->dynticks_nesting;
        rdtp->dynticks_nesting++;
        WARN_ON_ONCE(rdtp->dynticks_nesting == 0);
   */
  void rcu_nmi_enter(void)
  {
 -      struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
 +      struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
  
        if (rdtp->dynticks_nmi_nesting == 0 &&
            (atomic_read(&rdtp->dynticks) & 0x1))
   */
  void rcu_nmi_exit(void)
  {
 -      struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks);
 +      struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
  
        if (rdtp->dynticks_nmi_nesting == 0 ||
            --rdtp->dynticks_nmi_nesting != 0)
@@@ -660,7 -665,7 +660,7 @@@ int rcu_is_cpu_idle(void
        int ret;
  
        preempt_disable();
 -      ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0;
 +      ret = (atomic_read(this_cpu_ptr(&rcu_dynticks.dynticks)) & 0x1) == 0;
        preempt_enable();
        return ret;
  }
@@@ -698,7 -703,7 +698,7 @@@ bool rcu_lockdep_current_cpu_online(voi
        if (in_nmi())
                return 1;
        preempt_disable();
 -      rdp = &__get_cpu_var(rcu_sched_data);
 +      rdp = this_cpu_ptr(&rcu_sched_data);
        rnp = rdp->mynode;
        ret = (rdp->grpmask & rnp->qsmaskinit) ||
              !rcu_scheduler_fully_active;
@@@ -718,7 -723,7 +718,7 @@@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_c
   */
  static int rcu_is_cpu_rrupt_from_idle(void)
  {
 -      return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
 +      return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 1;
  }
  
  /*
@@@ -797,11 -802,8 +797,11 @@@ static int rcu_implicit_dynticks_qs(str
  
  static void record_gp_stall_check_time(struct rcu_state *rsp)
  {
 -      rsp->gp_start = jiffies;
 -      rsp->jiffies_stall = jiffies + rcu_jiffies_till_stall_check();
 +      unsigned long j = ACCESS_ONCE(jiffies);
 +
 +      rsp->gp_start = j;
 +      smp_wmb(); /* Record start time before stall time. */
 +      rsp->jiffies_stall = j + rcu_jiffies_till_stall_check();
  }
  
  /*
@@@ -930,48 -932,17 +930,48 @@@ static void print_cpu_stall(struct rcu_
  
  static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
  {
 +      unsigned long completed;
 +      unsigned long gpnum;
 +      unsigned long gps;
        unsigned long j;
        unsigned long js;
        struct rcu_node *rnp;
  
 -      if (rcu_cpu_stall_suppress)
 +      if (rcu_cpu_stall_suppress || !rcu_gp_in_progress(rsp))
                return;
        j = ACCESS_ONCE(jiffies);
 +
 +      /*
 +       * Lots of memory barriers to reject false positives.
 +       *
 +       * The idea is to pick up rsp->gpnum, then rsp->jiffies_stall,
 +       * then rsp->gp_start, and finally rsp->completed.  These values
 +       * are updated in the opposite order with memory barriers (or
 +       * equivalent) during grace-period initialization and cleanup.
 +       * Now, a false positive can occur if we get an new value of
 +       * rsp->gp_start and a old value of rsp->jiffies_stall.  But given
 +       * the memory barriers, the only way that this can happen is if one
 +       * grace period ends and another starts between these two fetches.
 +       * Detect this by comparing rsp->completed with the previous fetch
 +       * from rsp->gpnum.
 +       *
 +       * Given this check, comparisons of jiffies, rsp->jiffies_stall,
 +       * and rsp->gp_start suffice to forestall false positives.
 +       */
 +      gpnum = ACCESS_ONCE(rsp->gpnum);
 +      smp_rmb(); /* Pick up ->gpnum first... */
        js = ACCESS_ONCE(rsp->jiffies_stall);
 +      smp_rmb(); /* ...then ->jiffies_stall before the rest... */
 +      gps = ACCESS_ONCE(rsp->gp_start);
 +      smp_rmb(); /* ...and finally ->gp_start before ->completed. */
 +      completed = ACCESS_ONCE(rsp->completed);
 +      if (ULONG_CMP_GE(completed, gpnum) ||
 +          ULONG_CMP_LT(j, js) ||
 +          ULONG_CMP_GE(gps, js))
 +              return; /* No stall or GP completed since entering function. */
        rnp = rdp->mynode;
        if (rcu_gp_in_progress(rsp) &&
 -          (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask) && ULONG_CMP_GE(j, js)) {
 +          (ACCESS_ONCE(rnp->qsmask) & rdp->grpmask)) {
  
                /* We haven't checked in, so go dump stack. */
                print_cpu_stall(rsp);
@@@ -1326,7 -1297,7 +1326,7 @@@ static void note_gp_changes(struct rcu_
  }
  
  /*
-  * Initialize a new grace period.
+  * Initialize a new grace period.  Return 0 if no grace period required.
   */
  static int rcu_gp_init(struct rcu_state *rsp)
  {
  
        rcu_bind_gp_kthread();
        raw_spin_lock_irq(&rnp->lock);
+       if (rsp->gp_flags == 0) {
+               /* Spurious wakeup, tell caller to go back to sleep.  */
+               raw_spin_unlock_irq(&rnp->lock);
+               return 0;
+       }
        rsp->gp_flags = 0; /* Clear all flags: New grace period. */
  
-       if (rcu_gp_in_progress(rsp)) {
-               /* Grace period already in progress, don't start another.  */
+       if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) {
+               /*
+                * Grace period already in progress, don't start another.
+                * Not supposed to be able to happen.
+                */
                raw_spin_unlock_irq(&rnp->lock);
                return 0;
        }
  
        /* Advance to a new grace period and initialize state. */
 +      record_gp_stall_check_time(rsp);
 +      smp_wmb(); /* Record GP times before starting GP. */
        rsp->gpnum++;
        trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
 -      record_gp_stall_check_time(rsp);
        raw_spin_unlock_irq(&rnp->lock);
  
        /* Exclude any concurrent CPU-hotplug operations. */
  /*
   * Do one round of quiescent-state forcing.
   */
 -int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
 +static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
  {
        int fqs_state = fqs_state_in;
        bool isidle = false;
@@@ -1481,8 -1459,12 +1489,12 @@@ static void rcu_gp_cleanup(struct rcu_s
        rsp->fqs_state = RCU_GP_IDLE;
        rdp = this_cpu_ptr(rsp->rda);
        rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
-       if (cpu_needs_another_gp(rsp, rdp))
+       if (cpu_needs_another_gp(rsp, rdp)) {
 -              rsp->gp_flags = 1;
 +              rsp->gp_flags = RCU_GP_FLAG_INIT;
+               trace_rcu_grace_period(rsp->name,
+                                      ACCESS_ONCE(rsp->gpnum),
+                                      TPS("newreq"));
+       }
        raw_spin_unlock_irq(&rnp->lock);
  }
  
  static int __noreturn rcu_gp_kthread(void *arg)
  {
        int fqs_state;
+       int gf;
        unsigned long j;
        int ret;
        struct rcu_state *rsp = arg;
  
                /* Handle grace-period start. */
                for (;;) {
+                       trace_rcu_grace_period(rsp->name,
+                                              ACCESS_ONCE(rsp->gpnum),
+                                              TPS("reqwait"));
                        wait_event_interruptible(rsp->gp_wq,
-                                                rsp->gp_flags &
+                                                ACCESS_ONCE(rsp->gp_flags) &
                                                 RCU_GP_FLAG_INIT);
-                       if ((rsp->gp_flags & RCU_GP_FLAG_INIT) &&
-                           rcu_gp_init(rsp))
+                       if (rcu_gp_init(rsp))
                                break;
                        cond_resched();
                        flush_signals(current);
+                       trace_rcu_grace_period(rsp->name,
+                                              ACCESS_ONCE(rsp->gpnum),
+                                              TPS("reqwaitsig"));
                }
  
                /* Handle quiescent-state forcing. */
                        j = HZ;
                        jiffies_till_first_fqs = HZ;
                }
+               ret = 0;
                for (;;) {
-                       rsp->jiffies_force_qs = jiffies + j;
+                       if (!ret)
+                               rsp->jiffies_force_qs = jiffies + j;
+                       trace_rcu_grace_period(rsp->name,
+                                              ACCESS_ONCE(rsp->gpnum),
+                                              TPS("fqswait"));
                        ret = wait_event_interruptible_timeout(rsp->gp_wq,
-                                       (rsp->gp_flags & RCU_GP_FLAG_FQS) ||
+                                       ((gf = ACCESS_ONCE(rsp->gp_flags)) &
+                                        RCU_GP_FLAG_FQS) ||
                                        (!ACCESS_ONCE(rnp->qsmask) &&
                                         !rcu_preempt_blocked_readers_cgp(rnp)),
                                        j);
                            !rcu_preempt_blocked_readers_cgp(rnp))
                                break;
                        /* If time for quiescent-state forcing, do it. */
-                       if (ret == 0 || (rsp->gp_flags & RCU_GP_FLAG_FQS)) {
+                       if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
+                           (gf & RCU_GP_FLAG_FQS)) {
+                               trace_rcu_grace_period(rsp->name,
+                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      TPS("fqsstart"));
                                fqs_state = rcu_gp_fqs(rsp, fqs_state);
+                               trace_rcu_grace_period(rsp->name,
+                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      TPS("fqsend"));
                                cond_resched();
                        } else {
                                /* Deal with stray signal. */
                                cond_resched();
                                flush_signals(current);
+                               trace_rcu_grace_period(rsp->name,
+                                                      ACCESS_ONCE(rsp->gpnum),
+                                                      TPS("fqswaitsig"));
                        }
                        j = jiffies_till_next_fqs;
                        if (j > HZ) {
@@@ -1584,6 -1588,8 +1618,8 @@@ rcu_start_gp_advanced(struct rcu_state 
                return;
        }
        rsp->gp_flags = RCU_GP_FLAG_INIT;
+       trace_rcu_grace_period(rsp->name, ACCESS_ONCE(rsp->gpnum),
+                              TPS("newreq"));
  
        /*
         * We can't do wakeups while holding the rnp->lock, as that
@@@ -2755,13 -2761,10 +2791,13 @@@ static int rcu_cpu_has_callbacks(int cp
  
        for_each_rcu_flavor(rsp) {
                rdp = per_cpu_ptr(rsp->rda, cpu);
 -              if (rdp->qlen != rdp->qlen_lazy)
 +              if (!rdp->nxtlist)
 +                      continue;
 +              hc = true;
 +              if (rdp->qlen != rdp->qlen_lazy || !all_lazy) {
                        al = false;
 -              if (rdp->nxtlist)
 -                      hc = true;
 +                      break;
 +              }
        }
        if (all_lazy)
                *all_lazy = al;
@@@ -3328,8 -3331,8 +3364,8 @@@ void __init rcu_init(void
  
        rcu_bootup_announce();
        rcu_init_geometry();
 -      rcu_init_one(&rcu_sched_state, &rcu_sched_data);
        rcu_init_one(&rcu_bh_state, &rcu_bh_data);
 +      rcu_init_one(&rcu_sched_state, &rcu_sched_data);
        __rcu_init_preempt();
        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
  
diff --combined kernel/rcutree_plugin.h
@@@ -96,15 -96,10 +96,15 @@@ static void __init rcu_bootup_announce_
  #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
  #ifdef CONFIG_RCU_NOCB_CPU_ALL
        pr_info("\tOffload RCU callbacks from all CPUs\n");
 -      cpumask_setall(rcu_nocb_mask);
 +      cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
  #endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
  #endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
        if (have_rcu_nocb_mask) {
 +              if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
 +                      pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
 +                      cpumask_and(rcu_nocb_mask, cpu_possible_mask,
 +                                  rcu_nocb_mask);
 +              }
                cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
                pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
                if (rcu_nocb_poll)
@@@ -665,7 -660,7 +665,7 @@@ static void rcu_preempt_check_callbacks
  
  static void rcu_preempt_do_callbacks(void)
  {
 -      rcu_do_batch(&rcu_preempt_state, &__get_cpu_var(rcu_preempt_data));
 +      rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
  }
  
  #endif /* #ifdef CONFIG_RCU_BOOST */
@@@ -1337,7 -1332,7 +1337,7 @@@ static void invoke_rcu_callbacks_kthrea
   */
  static bool rcu_is_callbacks_kthread(void)
  {
 -      return __get_cpu_var(rcu_cpu_kthread_task) == current;
 +      return __this_cpu_read(rcu_cpu_kthread_task) == current;
  }
  
  #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
@@@ -1387,8 -1382,8 +1387,8 @@@ static int rcu_spawn_one_boost_kthread(
  
  static void rcu_kthread_do_work(void)
  {
 -      rcu_do_batch(&rcu_sched_state, &__get_cpu_var(rcu_sched_data));
 -      rcu_do_batch(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
 +      rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
 +      rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
        rcu_preempt_do_callbacks();
  }
  
@@@ -1407,7 -1402,7 +1407,7 @@@ static void rcu_cpu_kthread_park(unsign
  
  static int rcu_cpu_kthread_should_run(unsigned int cpu)
  {
 -      return __get_cpu_var(rcu_cpu_has_work);
 +      return __this_cpu_read(rcu_cpu_has_work);
  }
  
  /*
   */
  static void rcu_cpu_kthread(unsigned int cpu)
  {
 -      unsigned int *statusp = &__get_cpu_var(rcu_cpu_kthread_status);
 -      char work, *workp = &__get_cpu_var(rcu_cpu_has_work);
 +      unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
 +      char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
        int spincnt;
  
        for (spincnt = 0; spincnt < 10; spincnt++) {
@@@ -2113,15 -2108,22 +2113,22 @@@ static void __call_rcu_nocb_enqueue(str
  
        /* If we are not being polled and there is a kthread, awaken it ... */
        t = ACCESS_ONCE(rdp->nocb_kthread);
-       if (rcu_nocb_poll || !t)
 -      if (rcu_nocb_poll | !t) {
++      if (rcu_nocb_poll || !t) {
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                   TPS("WakeNotPoll"));
                return;
+       }
        len = atomic_long_read(&rdp->nocb_q_count);
        if (old_rhpp == &rdp->nocb_head) {
                wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
                rdp->qlen_last_fqs_check = 0;
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
        } else if (len > rdp->qlen_last_fqs_check + qhimark) {
                wake_up_process(t); /* ... or if many callbacks queued. */
                rdp->qlen_last_fqs_check = LONG_MAX / 2;
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
+       } else {
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
        }
        return;
  }
@@@ -2145,10 -2147,12 +2152,12 @@@ static bool __call_rcu_nocb(struct rcu_
        if (__is_kfree_rcu_offset((unsigned long)rhp->func))
                trace_rcu_kfree_callback(rdp->rsp->name, rhp,
                                         (unsigned long)rhp->func,
-                                        rdp->qlen_lazy, rdp->qlen);
+                                        -atomic_long_read(&rdp->nocb_q_count_lazy),
+                                        -atomic_long_read(&rdp->nocb_q_count));
        else
                trace_rcu_callback(rdp->rsp->name, rhp,
-                                  rdp->qlen_lazy, rdp->qlen);
+                                  -atomic_long_read(&rdp->nocb_q_count_lazy),
+                                  -atomic_long_read(&rdp->nocb_q_count));
        return 1;
  }
  
@@@ -2226,6 -2230,7 +2235,7 @@@ static void rcu_nocb_wait_gp(struct rcu
  static int rcu_nocb_kthread(void *arg)
  {
        int c, cl;
+       bool firsttime = 1;
        struct rcu_head *list;
        struct rcu_head *next;
        struct rcu_head **tail;
        /* Each pass through this loop invokes one batch of callbacks */
        for (;;) {
                /* If not polling, wait for next batch of callbacks. */
-               if (!rcu_nocb_poll)
+               if (!rcu_nocb_poll) {
+                       trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                           TPS("Sleep"));
                        wait_event_interruptible(rdp->nocb_wq, rdp->nocb_head);
+               } else if (firsttime) {
+                       firsttime = 0;
+                       trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                           TPS("Poll"));
+               }
                list = ACCESS_ONCE(rdp->nocb_head);
                if (!list) {
+                       if (!rcu_nocb_poll)
+                               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                                   TPS("WokeEmpty"));
                        schedule_timeout_interruptible(1);
                        flush_signals(current);
                        continue;
                }
+               firsttime = 1;
+               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                   TPS("WokeNonEmpty"));
  
                /*
                 * Extract queued callbacks, update counts, and wait
                        next = list->next;
                        /* Wait for enqueuing to complete, if needed. */
                        while (next == NULL && &list->next != tail) {
+                               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                                   TPS("WaitQueue"));
                                schedule_timeout_interruptible(1);
+                               trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                                   TPS("WokeQueue"));
                                next = list->next;
                        }
                        debug_rcu_head_unqueue(list);