Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 30 Apr 2013 14:39:01 +0000 (07:39 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 30 Apr 2013 14:39:01 +0000 (07:39 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Apr 2013 14:39:01 +0000 (07:39 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 30 Apr 2013 14:39:01 +0000 (07:39 -0700)
diff --combined Documentation/kernel-parameters.txt

index 12bbce3,22303b2..de12397
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -44,7 -44,6 +44,7 @@@ parameter is applicable
         AVR32   AVR32 architecture is enabled.
         AX25    Appropriate AX.25 support is enabled.
         BLACKFIN Blackfin architecture is enabled.
+ +      CLK     Common clock infrastructure is enabled.
         DRM     Direct Rendering Management support is enabled.
         DYNAMIC_DEBUG Build in debug messages and enable them at runtime
         EDD     BIOS Enhanced Disk Drive Services (EDD) is enabled
@@@ -321,13 -320,6 +321,13 @@@ bytes respectively. Such letter suffixe
                         on: enable for both 32- and 64-bit processes
                         off: disable for both 32- and 64-bit processes
   
+ +      alloc_snapshot  [FTRACE]
+ +                      Allocate the ftrace snapshot buffer on boot up when the
+ +                      main buffer is allocated. This is handy if debugging
+ +                      and you need to use tracing_snapshot() on boot up, and
+ +                      do not want to use tracing_snapshot_alloc() as it needs
+ +                      to be done where GFP_KERNEL allocations are allowed.
+ +
         amd_iommu=      [HW,X86-64]
                         Pass parameters to the AMD IOMMU driver in the system.
                         Possible values are:
@@@ -473,13 -465,6 +473,13 @@@
   
         cio_ignore=     [S390]
                         See Documentation/s390/CommonIO for details.
+ +      clk_ignore_unused
+ +                      [CLK]
+ +                      Keep all clocks already enabled by bootloader on,
+ +                      even if no driver has claimed them. This is useful
+ +                      for debug and development, but should not be
+ +                      needed on a platform with proper driver support.
+ +                      For more information, see Documentation/clk.txt.
   
         clock=          [BUGS=X86-32, HW] gettimeofday clocksource override.
                         [Deprecated]
@@@ -611,6 -596,9 +611,6 @@@
                         is selected automatically. Check
                         Documentation/kdump/kdump.txt for further details.
   
- -      crashkernel_low=size[KMG]
- -                      [KNL, x86] parts under 4G.
- -
         crashkernel=range1:size1[,range2:size2,...][@offset]
                         [KNL] Same as above, but depends on the memory
                         in the running system. The syntax of range is
@@@ -618,26 -606,6 +618,26 @@@
                         a memory unit (amount[KMG]). See also
                         Documentation/kdump/kdump.txt for an example.
   
+ +      crashkernel=size[KMG],high
+ +                      [KNL, x86_64] range could be above 4G. Allow kernel
+ +                      to allocate physical memory region from top, so could
+ +                      be above 4G if system have more than 4G ram installed.
+ +                      Otherwise memory region will be allocated below 4G, if
+ +                      available.
+ +                      It will be ignored if crashkernel=X is specified.
+ +      crashkernel=size[KMG],low
+ +                      [KNL, x86_64] range under 4G. When crashkernel=X,high
+ +                      is passed, kernel could allocate physical memory region
+ +                      above 4G, that cause second kernel crash on system
+ +                      that require some amount of low memory, e.g. swiotlb
+ +                      requires at least 64M+32K low memory.  Kernel would
+ +                      try to allocate 72M below 4G automatically.
+ +                      This one let user to specify own low range under 4G
+ +                      for second kernel instead.
+ +                      0: to disable low allocation.
+ +                      It will be ignored when crashkernel=X,high is not used
+ +                      or memory reserved is below 4G.
+ +
         cs89x0_dma=     [HW,NET]
                         Format: <dma>
   
@@@ -820,12 -788,6 +820,12 @@@
         edd=            [EDD]
                         Format: {"off" | "on" | "skip[mbr]"}
   
+ +      efi_no_storage_paranoia [EFI; X86]
+ +                      Using this parameter you can use more than 50% of
+ +                      your efi variable storage. Use this parameter only if
+ +                      you are really sure that your UEFI does sane gc and
+ +                      fulfills the spec otherwise your board may brick.
+ +
         eisa_irq_edge=  [PARISC,HW]
                         See header of drivers/parisc/eisa.c.
   
@@@ -2499,9 -2461,12 +2499,12 @@@
                         In kernels built with CONFIG_RCU_NOCB_CPU=y, set
                         the specified list of CPUs to be no-callback CPUs.
                         Invocation of these CPUs' RCU callbacks will
-                       be offloaded to "rcuoN" kthreads created for
-                       that purpose.  This reduces OS jitter on the
+                       be offloaded to "rcuox/N" kthreads created for
+                       that purpose, where "x" is "b" for RCU-bh, "p"
+                       for RCU-preempt, and "s" for RCU-sched, and "N"
+                       is the CPU number.  This reduces OS jitter on the
                         offloaded CPUs, which can be useful for HPC and
+ 
                         real-time workloads.  It can also improve energy
                         efficiency for asymmetric multiprocessors.
   
@@@ -2525,6 -2490,17 +2528,17 @@@
                         leaf rcu_node structure.  Useful for very large
                         systems.
   
+       rcutree.jiffies_till_first_fqs= [KNL,BOOT]
+                       Set delay from grace-period initialization to
+                       first attempt to force quiescent states.
+                       Units are jiffies, minimum value is zero,
+                       and maximum value is HZ.
+ 
+       rcutree.jiffies_till_next_fqs= [KNL,BOOT]
+                       Set delay between subsequent attempts to force
+                       quiescent states.  Units are jiffies, minimum
+                       value is one, and maximum value is HZ.
+ 
         rcutree.qhimark=        [KNL,BOOT]
                         Set threshold of queued
                         RCU callbacks over which batch limiting is disabled.
@@@ -2539,16 -2515,15 +2553,15 @@@
         rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
                         Set timeout for RCU CPU stall warning messages.
   
-       rcutree.jiffies_till_first_fqs= [KNL,BOOT]
-                       Set delay from grace-period initialization to
-                       first attempt to force quiescent states.
-                       Units are jiffies, minimum value is zero,
-                       and maximum value is HZ.
+       rcutree.rcu_idle_gp_delay=      [KNL,BOOT]
+                       Set wakeup interval for idle CPUs that have
+                       RCU callbacks (RCU_FAST_NO_HZ=y).
   
-       rcutree.jiffies_till_next_fqs= [KNL,BOOT]
-                       Set delay between subsequent attempts to force
-                       quiescent states.  Units are jiffies, minimum
-                       value is one, and maximum value is HZ.
+       rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT]
+                       Set wakeup interval for idle CPUs that have
+                       only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
+                       Lazy RCU callbacks are those which RCU can
+                       prove do nothing more than free memory.
   
         rcutorture.fqs_duration= [KNL,BOOT]
                         Set duration of force_quiescent_state bursts.
@@@ -3260,15 -3235,6 +3273,15 @@@
                         or other driver-specific files in the
                         Documentation/watchdog/ directory.
   
+ +      workqueue.disable_numa
+ +                      By default, all work items queued to unbound
+ +                      workqueues are affine to the NUMA nodes they're
+ +                      issued on, which results in better behavior in
+ +                      general.  If NUMA affinity needs to be disabled for
+ +                      whatever reason, this option can be used.  Note
+ +                      that this also can be controlled per-workqueue for
+ +                      workqueues visible under /sys/bus/workqueue/.
+ +
         x2apic_phys     [X86-64,APIC] Use x2apic physical mode instead of
                         default x2apic cluster mode on platforms
                         supporting x2apic.
diff --combined kernel/rcutree.c

index 2f8530b,2d5f94c..d853430
--- 1/kernel/rcutree.c
--- 2/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@@ -64,7 -64,7 +64,7 @@@
   static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
   static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
   
- #define RCU_STATE_INITIALIZER(sname, cr) { \
+ #define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
         .level = { &sname##_state.node[0] }, \
         .call = cr, \
         .fqs_state = RCU_GP_IDLE, \
@@@ -76,13 -76,14 +76,14 @@@
         .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
         .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
         .name = #sname, \
+       .abbr = sabbr, \
   }
   
   struct rcu_state rcu_sched_state =
-       RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
+       RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
   DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
   
- struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
+ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
   DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
   
   static struct rcu_state *rcu_state;
@@@ -223,6 -224,8 +224,8 @@@ static ulong jiffies_till_next_fqs = RC
   module_param(jiffies_till_first_fqs, ulong, 0644);
   module_param(jiffies_till_next_fqs, ulong, 0644);
   
+ static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+                                 struct rcu_data *rdp);
   static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
   static void force_quiescent_state(struct rcu_state *rsp);
   static int rcu_pending(int cpu);
@@@ -310,6 -313,8 +313,8 @@@ cpu_needs_another_gp(struct rcu_state *
   
         if (rcu_gp_in_progress(rsp))
                 return 0;  /* No, a grace period is already in progress. */
+       if (rcu_nocb_needs_gp(rsp))
+               return 1;  /* Yes, a no-CBs CPU needs one. */
         if (!rdp->nxttail[RCU_NEXT_TAIL])
                 return 0;  /* No, this is a no-CBs (or offline) CPU. */
         if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
@@@ -1035,10 -1040,11 +1040,11 @@@ static void init_callback_list(struct r
   {
         int i;
   
+       if (init_nocb_callback_list(rdp))
+               return;
         rdp->nxtlist = NULL;
         for (i = 0; i < RCU_NEXT_SIZE; i++)
                 rdp->nxttail[i] = &rdp->nxtlist;
-       init_nocb_callback_list(rdp);
   }
   
   /*
@@@ -1071,6 -1077,120 +1077,120 @@@ static unsigned long rcu_cbs_completed(
   }
   
   /*
+  * Trace-event helper function for rcu_start_future_gp() and
+  * rcu_nocb_wait_gp().
+  */
+ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+                               unsigned long c, char *s)
+ {
+       trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
+                                     rnp->completed, c, rnp->level,
+                                     rnp->grplo, rnp->grphi, s);
+ }
+ 
+ /*
+  * Start some future grace period, as needed to handle newly arrived
+  * callbacks.  The required future grace periods are recorded in each
+  * rcu_node structure's ->need_future_gp field.
+  *
+  * The caller must hold the specified rcu_node structure's ->lock.
+  */
+ static unsigned long __maybe_unused
+ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
+ {
+       unsigned long c;
+       int i;
+       struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+ 
+       /*
+        * Pick up grace-period number for new callbacks.  If this
+        * grace period is already marked as needed, return to the caller.
+        */
+       c = rcu_cbs_completed(rdp->rsp, rnp);
+       trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
+       if (rnp->need_future_gp[c & 0x1]) {
+               trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
+               return c;
+       }
+ 
+       /*
+        * If either this rcu_node structure or the root rcu_node structure
+        * believe that a grace period is in progress, then we must wait
+        * for the one following, which is in "c".  Because our request
+        * will be noticed at the end of the current grace period, we don't
+        * need to explicitly start one.
+        */
+       if (rnp->gpnum != rnp->completed ||
+           ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
+               rnp->need_future_gp[c & 0x1]++;
+               trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
+               return c;
+       }
+ 
+       /*
+        * There might be no grace period in progress.  If we don't already
+        * hold it, acquire the root rcu_node structure's lock in order to
+        * start one (if needed).
+        */
+       if (rnp != rnp_root)
+               raw_spin_lock(&rnp_root->lock);
+ 
+       /*
+        * Get a new grace-period number.  If there really is no grace
+        * period in progress, it will be smaller than the one we obtained
+        * earlier.  Adjust callbacks as needed.  Note that even no-CBs
+        * CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
+        */
+       c = rcu_cbs_completed(rdp->rsp, rnp_root);
+       for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
+               if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
+                       rdp->nxtcompleted[i] = c;
+ 
+       /*
+        * If the needed for the required grace period is already
+        * recorded, trace and leave.
+        */
+       if (rnp_root->need_future_gp[c & 0x1]) {
+               trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
+               goto unlock_out;
+       }
+ 
+       /* Record the need for the future grace period. */
+       rnp_root->need_future_gp[c & 0x1]++;
+ 
+       /* If a grace period is not already in progress, start one. */
+       if (rnp_root->gpnum != rnp_root->completed) {
+               trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
+       } else {
+               trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
+               rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
+       }
+ unlock_out:
+       if (rnp != rnp_root)
+               raw_spin_unlock(&rnp_root->lock);
+       return c;
+ }
+ 
+ /*
+  * Clean up any old requests for the just-ended grace period.  Also return
+  * whether any additional grace periods have been requested.  Also invoke
+  * rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
+  * waiting for this grace period to complete.
+  */
+ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+ {
+       int c = rnp->completed;
+       int needmore;
+       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+ 
+       rcu_nocb_gp_cleanup(rsp, rnp);
+       rnp->need_future_gp[c & 0x1] = 0;
+       needmore = rnp->need_future_gp[(c + 1) & 0x1];
+       trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
+       return needmore;
+ }
+ 
+ /*
    * If there is room, assign a ->completed number to any callbacks on
    * this CPU that have not already been assigned.  Also accelerate any
    * callbacks that were previously assigned a ->completed number that has
@@@ -1129,6 -1249,8 +1249,8 @@@ static void rcu_accelerate_cbs(struct r
                 rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
                 rdp->nxtcompleted[i] = c;
         }
+       /* Record any needed additional grace periods. */
+       rcu_start_future_gp(rnp, rdp);
   
         /* Trace depending on how much we were able to accelerate. */
         if (!*rdp->nxttail[RCU_WAIT_TAIL])
@@@ -1308,9 -1430,9 +1430,9 @@@ static int rcu_gp_init(struct rcu_stat
                 rdp = this_cpu_ptr(rsp->rda);
                 rcu_preempt_check_blocked_tasks(rnp);
                 rnp->qsmask = rnp->qsmaskinit;
-               rnp->gpnum = rsp->gpnum;
+               ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
                 WARN_ON_ONCE(rnp->completed != rsp->completed);
-               rnp->completed = rsp->completed;
+               ACCESS_ONCE(rnp->completed) = rsp->completed;
                 if (rnp == rdp->mynode)
                         rcu_start_gp_per_cpu(rsp, rnp, rdp);
                 rcu_preempt_boost_start_gp(rnp);
@@@ -1319,7 -1441,8 +1441,8 @@@
                                             rnp->grphi, rnp->qsmask);
                 raw_spin_unlock_irq(&rnp->lock);
   #ifdef CONFIG_PROVE_RCU_DELAY
-               if ((prandom_u32() % (rcu_num_nodes * 8)) == 0)
- -              if ((random32() % (rcu_num_nodes * 8)) == 0 &&
++              if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
+                   system_state == SYSTEM_RUNNING)
                         schedule_timeout_uninterruptible(2);
   #endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
                 cond_resched();
@@@ -1361,6 -1484,7 +1484,7 @@@ int rcu_gp_fqs(struct rcu_state *rsp, i
   static void rcu_gp_cleanup(struct rcu_state *rsp)
   {
         unsigned long gp_duration;
+       int nocb = 0;
         struct rcu_data *rdp;
         struct rcu_node *rnp = rcu_get_root(rsp);
   
@@@ -1390,17 -1514,23 +1514,23 @@@
          */
         rcu_for_each_node_breadth_first(rsp, rnp) {
                 raw_spin_lock_irq(&rnp->lock);
-               rnp->completed = rsp->gpnum;
+               ACCESS_ONCE(rnp->completed) = rsp->gpnum;
+               rdp = this_cpu_ptr(rsp->rda);
+               if (rnp == rdp->mynode)
+                       __rcu_process_gp_end(rsp, rnp, rdp);
+               nocb += rcu_future_gp_cleanup(rsp, rnp);
                 raw_spin_unlock_irq(&rnp->lock);
                 cond_resched();
         }
         rnp = rcu_get_root(rsp);
         raw_spin_lock_irq(&rnp->lock);
+       rcu_nocb_gp_set(rnp, nocb);
   
         rsp->completed = rsp->gpnum; /* Declare grace period done. */
         trace_rcu_grace_period(rsp->name, rsp->completed, "end");
         rsp->fqs_state = RCU_GP_IDLE;
         rdp = this_cpu_ptr(rsp->rda);
+       rcu_advance_cbs(rsp, rnp, rdp);  /* Reduce false positives below. */
         if (cpu_needs_another_gp(rsp, rdp))
                 rsp->gp_flags = 1;
         raw_spin_unlock_irq(&rnp->lock);
@@@ -1476,57 -1606,62 +1606,62 @@@ static int __noreturn rcu_gp_kthread(vo
   /*
    * Start a new RCU grace period if warranted, re-initializing the hierarchy
    * in preparation for detecting the next grace period.  The caller must hold
-  * the root node's ->lock, which is released before return.  Hard irqs must
-  * be disabled.
+  * the root node's ->lock and hard irqs must be disabled.
    *
    * Note that it is legal for a dying CPU (which is marked as offline) to
    * invoke this function.  This can happen when the dying CPU reports its
    * quiescent state.
    */
   static void
- rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
-       __releases(rcu_get_root(rsp)->lock)
+ rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
+                     struct rcu_data *rdp)
   {
-       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-       struct rcu_node *rnp = rcu_get_root(rsp);
- 
-       if (!rsp->gp_kthread ||
-           !cpu_needs_another_gp(rsp, rdp)) {
+       if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
                 /*
                  * Either we have not yet spawned the grace-period
                  * task, this CPU does not need another grace period,
                  * or a grace period is already in progress.
                  * Either way, don't start a new grace period.
                  */
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
                 return;
         }
- 
-       /*
-        * Because there is no grace period in progress right now,
-        * any callbacks we have up to this point will be satisfied
-        * by the next grace period.  So this is a good place to
-        * assign a grace period number to recently posted callbacks.
-        */
-       rcu_accelerate_cbs(rsp, rnp, rdp);
- 
         rsp->gp_flags = RCU_GP_FLAG_INIT;
-       raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
- 
-       /* Ensure that CPU is aware of completion of last grace period. */
-       rcu_process_gp_end(rsp, rdp);
-       local_irq_restore(flags);
   
         /* Wake up rcu_gp_kthread() to start the grace period. */
         wake_up(&rsp->gp_wq);
   }
   
   /*
+  * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
+  * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it
+  * is invoked indirectly from rcu_advance_cbs(), which would result in
+  * endless recursion -- or would do so if it wasn't for the self-deadlock
+  * that is encountered beforehand.
+  */
+ static void
+ rcu_start_gp(struct rcu_state *rsp)
+ {
+       struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+       struct rcu_node *rnp = rcu_get_root(rsp);
+ 
+       /*
+        * If there is no grace period in progress right now, any
+        * callbacks we have up to this point will be satisfied by the
+        * next grace period.  Also, advancing the callbacks reduces the
+        * probability of false positives from cpu_needs_another_gp()
+        * resulting in pointless grace periods.  So, advance callbacks
+        * then start the grace period!
+        */
+       rcu_advance_cbs(rsp, rnp, rdp);
+       rcu_start_gp_advanced(rsp, rnp, rdp);
+ }
+ 
+ /*
    * Report a full set of quiescent states to the specified rcu_state
    * data structure.  This involves cleaning up after the prior grace
    * period and letting rcu_start_gp() start up the next grace period
-  * if one is needed.  Note that the caller must hold rnp->lock, as
-  * required by rcu_start_gp(), which will release it.
+  * if one is needed.  Note that the caller must hold rnp->lock, which
+  * is released before return.
    */
   static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
         __releases(rcu_get_root(rsp)->lock)
@@@ -2124,7 -2259,8 +2259,8 @@@ __rcu_process_callbacks(struct rcu_stat
         local_irq_save(flags);
         if (cpu_needs_another_gp(rsp, rdp)) {
                 raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
-               rcu_start_gp(rsp, flags);  /* releases above lock */
+               rcu_start_gp(rsp);
+               raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
         } else {
                 local_irq_restore(flags);
         }
@@@ -2169,7 -2305,8 +2305,8 @@@ static void invoke_rcu_callbacks(struc
   
   static void invoke_rcu_core(void)
   {
-       raise_softirq(RCU_SOFTIRQ);
+       if (cpu_online(smp_processor_id()))
+               raise_softirq(RCU_SOFTIRQ);
   }
   
   /*
@@@ -2204,11 -2341,11 +2341,11 @@@ static void __call_rcu_core(struct rcu_
   
                 /* Start a new grace period if one not already started. */
                 if (!rcu_gp_in_progress(rsp)) {
-                       unsigned long nestflag;
                         struct rcu_node *rnp_root = rcu_get_root(rsp);
   
-                       raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
-                       rcu_start_gp(rsp, nestflag);  /* rlses rnp_root->lock */
+                       raw_spin_lock(&rnp_root->lock);
+                       rcu_start_gp(rsp);
+                       raw_spin_unlock(&rnp_root->lock);
                 } else {
                         /* Give the grace period a kick. */
                         rdp->blimit = LONG_MAX;
@@@ -2628,19 -2765,27 +2765,27 @@@ static int rcu_pending(int cpu
   }
   
   /*
-  * Check to see if any future RCU-related work will need to be done
-  * by the current CPU, even if none need be done immediately, returning
-  * 1 if so.
+  * Return true if the specified CPU has any callback.  If all_lazy is
+  * non-NULL, store an indication of whether all callbacks are lazy.
+  * (If there are no callbacks, all of them are deemed to be lazy.)
    */
- static int rcu_cpu_has_callbacks(int cpu)
+ static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
   {
+       bool al = true;
+       bool hc = false;
+       struct rcu_data *rdp;
         struct rcu_state *rsp;
   
-       /* RCU callbacks either ready or pending? */
-       for_each_rcu_flavor(rsp)
-               if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
-                       return 1;
-       return 0;
+       for_each_rcu_flavor(rsp) {
+               rdp = per_cpu_ptr(rsp->rda, cpu);
+               if (rdp->qlen != rdp->qlen_lazy)
+                       al = false;
+               if (rdp->nxtlist)
+                       hc = true;
+       }
+       if (all_lazy)
+               *all_lazy = al;
+       return hc;
   }
   
   /*
@@@ -2859,7 -3004,6 +3004,6 @@@ rcu_init_percpu_data(int cpu, struct rc
         rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
         atomic_set(&rdp->dynticks->dynticks,
                    (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
-       rcu_prepare_for_idle_init(cpu);
         raw_spin_unlock(&rnp->lock);            /* irqs remain disabled. */
   
         /* Add CPU to rcu_node bitmasks. */
@@@ -2909,7 -3053,6 +3053,6 @@@ static int __cpuinit rcu_cpu_notify(str
         struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
         struct rcu_node *rnp = rdp->mynode;
         struct rcu_state *rsp;
-       int ret = NOTIFY_OK;
   
         trace_rcu_utilization("Start CPU hotplug");
         switch (action) {
@@@ -2923,21 -3066,12 +3066,12 @@@
                 rcu_boost_kthread_setaffinity(rnp, -1);
                 break;
         case CPU_DOWN_PREPARE:
-               if (nocb_cpu_expendable(cpu))
-                       rcu_boost_kthread_setaffinity(rnp, cpu);
-               else
-                       ret = NOTIFY_BAD;
+               rcu_boost_kthread_setaffinity(rnp, cpu);
                 break;
         case CPU_DYING:
         case CPU_DYING_FROZEN:
-               /*
-                * The whole machine is "stopped" except this CPU, so we can
-                * touch any data without introducing corruption. We send the
-                * dying CPU's callbacks to an arbitrarily chosen online CPU.
-                */
                 for_each_rcu_flavor(rsp)
                         rcu_cleanup_dying_cpu(rsp);
-               rcu_cleanup_after_idle(cpu);
                 break;
         case CPU_DEAD:
         case CPU_DEAD_FROZEN:
@@@ -2950,7 -3084,7 +3084,7 @@@
                 break;
         }
         trace_rcu_utilization("End CPU hotplug");
-       return ret;
+       return NOTIFY_OK;
   }
   
   /*
@@@ -3085,6 -3219,7 +3219,7 @@@ static void __init rcu_init_one(struct 
                         }
                         rnp->level = i;
                         INIT_LIST_HEAD(&rnp->blkd_tasks);
+                       rcu_init_one_nocb(rnp);
                 }
         }
   
@@@ -3170,8 -3305,7 +3305,7 @@@ void __init rcu_init(void
         rcu_init_one(&rcu_sched_state, &rcu_sched_data);
         rcu_init_one(&rcu_bh_state, &rcu_bh_data);
         __rcu_init_preempt();
-       rcu_init_nocb();
-        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+       open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
   
         /*
          * We don't need protection against CPU-hotplug here because
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 30 Apr 2013 14:39:01 +0000 (07:39 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 30 Apr 2013 14:39:01 +0000 (07:39 -0700)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/rcutree.c	patch \|	diff1 \|	diff2 \|	blob \| history