Merge tag 'rcu-urgent.2022.12.17a' of git://git.kernel.org/pub/scm/linux/kernel/git...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 21 Dec 2022 15:59:57 +0000 (07:59 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 21 Dec 2022 15:59:57 +0000 (07:59 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 21 Dec 2022 15:59:57 +0000 (07:59 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 21 Dec 2022 15:59:57 +0000 (07:59 -0800)
diff --combined kernel/rcu/tree.c

index d04f219,83c6baa..cf34a96
--- 1/kernel/rcu/tree.c
--- 2/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@ -301,6 -301,12 +301,6 @@@ static bool rcu_dynticks_in_eqs(int sna
         return !(snap & RCU_DYNTICKS_IDX);
   }
   
- -/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
- -bool rcu_is_idle_cpu(int cpu)
- -{
- -      return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
- -}
- -
   /*
    * Return true if the CPU corresponding to the specified rcu_data
    * structure has spent some time in an extended quiescent state since
@@@ -1362,7 -1368,7 +1362,7 @@@ static void rcu_poll_gp_seq_start(unsig
   {
         struct rcu_node *rnp = rcu_get_root();
   
-       if (rcu_init_invoked())
+       if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
                 raw_lockdep_assert_held_rcu_node(rnp);
   
         // If RCU was idle, note beginning of GP.
@@@ -1378,7 -1384,7 +1378,7 @@@ static void rcu_poll_gp_seq_end(unsigne
   {
         struct rcu_node *rnp = rcu_get_root();
   
-       if (rcu_init_invoked())
+       if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
                 raw_lockdep_assert_held_rcu_node(rnp);
   
         // If the previously noted GP is still in effect, record the
@@@ -1401,7 -1407,8 +1401,8 @@@ static void rcu_poll_gp_seq_start_unloc
         struct rcu_node *rnp = rcu_get_root();
   
         if (rcu_init_invoked()) {
-               lockdep_assert_irqs_enabled();
+               if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
+                       lockdep_assert_irqs_enabled();
                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
         }
         rcu_poll_gp_seq_start(snap);
@@@ -1417,7 -1424,8 +1418,8 @@@ static void rcu_poll_gp_seq_end_unlocke
         struct rcu_node *rnp = rcu_get_root();
   
         if (rcu_init_invoked()) {
-               lockdep_assert_irqs_enabled();
+               if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
+                       lockdep_assert_irqs_enabled();
                 raw_spin_lock_irqsave_rcu_node(rnp, flags);
         }
         rcu_poll_gp_seq_end(snap);
@@@ -2102,7 -2110,7 +2104,7 @@@ int rcutree_dying_cpu(unsigned int cpu
         if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
                 return 0;
   
- -      blkd = !!(rnp->qsmask & rdp->grpmask);
+ +      blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
         trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
                                blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
         return 0;
@@@ -2412,7 -2420,7 +2414,7 @@@ void rcu_force_quiescent_state(void
         struct rcu_node *rnp_old = NULL;
   
         /* Funnel through hierarchy to reduce memory contention. */
- -      rnp = __this_cpu_read(rcu_data.mynode);
+ +      rnp = raw_cpu_read(rcu_data.mynode);
         for (; rnp != NULL; rnp = rnp->parent) {
                 ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
                        !raw_spin_trylock(&rnp->fqslock);
@@@ -2724,8 -2732,47 +2726,8 @@@ static void check_cb_ovld(struct rcu_da
         raw_spin_unlock_rcu_node(rnp);
   }
   
- -/**
- - * call_rcu() - Queue an RCU callback for invocation after a grace period.
- - * @head: structure to be used for queueing the RCU updates.
- - * @func: actual callback function to be invoked after the grace period
- - *
- - * The callback function will be invoked some time after a full grace
- - * period elapses, in other words after all pre-existing RCU read-side
- - * critical sections have completed.  However, the callback function
- - * might well execute concurrently with RCU read-side critical sections
- - * that started after call_rcu() was invoked.
- - *
- - * RCU read-side critical sections are delimited by rcu_read_lock()
- - * and rcu_read_unlock(), and may be nested.  In addition, but only in
- - * v5.0 and later, regions of code across which interrupts, preemption,
- - * or softirqs have been disabled also serve as RCU read-side critical
- - * sections.  This includes hardware interrupt handlers, softirq handlers,
- - * and NMI handlers.
- - *
- - * Note that all CPUs must agree that the grace period extended beyond
- - * all pre-existing RCU read-side critical section.  On systems with more
- - * than one CPU, this means that when "func()" is invoked, each CPU is
- - * guaranteed to have executed a full memory barrier since the end of its
- - * last RCU read-side critical section whose beginning preceded the call
- - * to call_rcu().  It also means that each CPU executing an RCU read-side
- - * critical section that continues beyond the start of "func()" must have
- - * executed a memory barrier after the call_rcu() but before the beginning
- - * of that RCU read-side critical section.  Note that these guarantees
- - * include CPUs that are offline, idle, or executing in user mode, as
- - * well as CPUs that are executing in the kernel.
- - *
- - * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- - * resulting RCU callback function "func()", then both CPU A and CPU B are
- - * guaranteed to execute a full memory barrier during the time interval
- - * between the call to call_rcu() and the invocation of "func()" -- even
- - * if CPU A and CPU B are the same CPU (but again only if the system has
- - * more than one CPU).
- - *
- - * Implementation of these memory-ordering guarantees is described here:
- - * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
- - */
- -void call_rcu(struct rcu_head *head, rcu_callback_t func)
+ +static void
+ +__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
   {
         static atomic_t doublefrees;
         unsigned long flags;
@@@ -2766,7 -2813,7 +2768,7 @@@
         }
   
         check_cb_ovld(rdp);
- -      if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
+ +      if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
                 return; // Enqueued onto ->nocb_bypass, so just leave.
         // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
         rcu_segcblist_enqueue(&rdp->cblist, head);
@@@ -2788,84 -2835,8 +2790,84 @@@
                 local_irq_restore(flags);
         }
   }
- -EXPORT_SYMBOL_GPL(call_rcu);
   
+ +#ifdef CONFIG_RCU_LAZY
+ +/**
+ + * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
+ + * flush all lazy callbacks (including the new one) to the main ->cblist while
+ + * doing so.
+ + *
+ + * @head: structure to be used for queueing the RCU updates.
+ + * @func: actual callback function to be invoked after the grace period
+ + *
+ + * The callback function will be invoked some time after a full grace
+ + * period elapses, in other words after all pre-existing RCU read-side
+ + * critical sections have completed.
+ + *
+ + * Use this API instead of call_rcu() if you don't want the callback to be
+ + * invoked after very long periods of time, which can happen on systems without
+ + * memory pressure and on systems which are lightly loaded or mostly idle.
+ + * This function will cause callbacks to be invoked sooner than later at the
+ + * expense of extra power. Other than that, this function is identical to, and
+ + * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
+ + * ordering and other functionality.
+ + */
+ +void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+ +{
+ +      return __call_rcu_common(head, func, false);
+ +}
+ +EXPORT_SYMBOL_GPL(call_rcu_hurry);
+ +#endif
+ +
+ +/**
+ + * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ + * By default the callbacks are 'lazy' and are kept hidden from the main
+ + * ->cblist to prevent starting of grace periods too soon.
+ + * If you desire grace periods to start very soon, use call_rcu_hurry().
+ + *
+ + * @head: structure to be used for queueing the RCU updates.
+ + * @func: actual callback function to be invoked after the grace period
+ + *
+ + * The callback function will be invoked some time after a full grace
+ + * period elapses, in other words after all pre-existing RCU read-side
+ + * critical sections have completed.  However, the callback function
+ + * might well execute concurrently with RCU read-side critical sections
+ + * that started after call_rcu() was invoked.
+ + *
+ + * RCU read-side critical sections are delimited by rcu_read_lock()
+ + * and rcu_read_unlock(), and may be nested.  In addition, but only in
+ + * v5.0 and later, regions of code across which interrupts, preemption,
+ + * or softirqs have been disabled also serve as RCU read-side critical
+ + * sections.  This includes hardware interrupt handlers, softirq handlers,
+ + * and NMI handlers.
+ + *
+ + * Note that all CPUs must agree that the grace period extended beyond
+ + * all pre-existing RCU read-side critical section.  On systems with more
+ + * than one CPU, this means that when "func()" is invoked, each CPU is
+ + * guaranteed to have executed a full memory barrier since the end of its
+ + * last RCU read-side critical section whose beginning preceded the call
+ + * to call_rcu().  It also means that each CPU executing an RCU read-side
+ + * critical section that continues beyond the start of "func()" must have
+ + * executed a memory barrier after the call_rcu() but before the beginning
+ + * of that RCU read-side critical section.  Note that these guarantees
+ + * include CPUs that are offline, idle, or executing in user mode, as
+ + * well as CPUs that are executing in the kernel.
+ + *
+ + * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ + * resulting RCU callback function "func()", then both CPU A and CPU B are
+ + * guaranteed to execute a full memory barrier during the time interval
+ + * between the call to call_rcu() and the invocation of "func()" -- even
+ + * if CPU A and CPU B are the same CPU (but again only if the system has
+ + * more than one CPU).
+ + *
+ + * Implementation of these memory-ordering guarantees is described here:
+ + * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ + */
+ +void call_rcu(struct rcu_head *head, rcu_callback_t func)
+ +{
+ +      return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
+ +}
+ +EXPORT_SYMBOL_GPL(call_rcu);
   
   /* Maximum number of jiffies to wait before draining a batch. */
   #define KFREE_DRAIN_JIFFIES (5 * HZ)
@@@ -3540,7 -3511,7 +3542,7 @@@ void synchronize_rcu(void
                 if (rcu_gp_is_expedited())
                         synchronize_rcu_expedited();
                 else
- -                      wait_rcu_gp(call_rcu);
+ +                      wait_rcu_gp(call_rcu_hurry);
                 return;
         }
   
@@@ -3927,8 -3898,6 +3929,8 @@@ static void rcu_barrier_entrain(struct 
   {
         unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
         unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
+ +      bool wake_nocb = false;
+ +      bool was_alldone = false;
   
         lockdep_assert_held(&rcu_state.barrier_lock);
         if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
@@@ -3937,14 -3906,7 +3939,14 @@@
         rdp->barrier_head.func = rcu_barrier_callback;
         debug_rcu_head_queue(&rdp->barrier_head);
         rcu_nocb_lock(rdp);
- -      WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+ +      /*
+ +       * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
+ +       * queue. This way we don't wait for bypass timer that can reach seconds
+ +       * if it's fully lazy.
+ +       */
+ +      was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
+ +      WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
+ +      wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
         if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
                 atomic_inc(&rcu_state.barrier_cpu_count);
         } else {
@@@ -3952,8 -3914,6 +3954,8 @@@
                 rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
         }
         rcu_nocb_unlock(rdp);
+ +      if (wake_nocb)
+ +              wake_nocb_gp(rdp, false);
         smp_store_release(&rdp->barrier_seq_snap, gseq);
   }
   
@@@ -4320,6 -4280,8 +4322,6 @@@ void rcu_report_dead(unsigned int cpu
         // Do any dangling deferred wakeups.
         do_nocb_deferred_wakeup(rdp);
   
- -      /* QS for any half-done expedited grace period. */
- -      rcu_report_exp_rdp(rdp);
         rcu_preempt_deferred_qs(current);
   
         /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
@@@ -4367,7 -4329,7 +4369,7 @@@ void rcutree_migrate_callbacks(int cpu
         my_rdp = this_cpu_ptr(&rcu_data);
         my_rnp = my_rdp->mynode;
         rcu_nocb_lock(my_rdp); /* irqs already disabled. */
- -      WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
+ +      WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
         raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
         /* Leverage recent GPs and set GP for new callbacks. */
         needwake = rcu_advance_cbs(my_rnp, rdp) ||
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 21 Dec 2022 15:59:57 +0000 (07:59 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 21 Dec 2022 15:59:57 +0000 (07:59 -0800)