Merge tag 'rcu-urgent.2022.12.17a' of git://git.kernel.org/pub/scm/linux/kernel/git...

[platform/kernel/linux-starfive.git] / kernel / rcu / tree.c
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c

index 83c6baa..cf34a96 100644 (file)
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -301,12 +301,6 @@ static bool rcu_dynticks_in_eqs(int snap)
         return !(snap & RCU_DYNTICKS_IDX);
  }
  
-/* Return true if the specified CPU is currently idle from an RCU viewpoint.  */
-bool rcu_is_idle_cpu(int cpu)
-{
-       return rcu_dynticks_in_eqs(rcu_dynticks_snap(cpu));
-}
-
  /*
   * Return true if the CPU corresponding to the specified rcu_data
   * structure has spent some time in an extended quiescent state since
@@ -2110,7 +2104,7 @@ int rcutree_dying_cpu(unsigned int cpu)
         if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
                 return 0;
  
-       blkd = !!(rnp->qsmask & rdp->grpmask);
+       blkd = !!(READ_ONCE(rnp->qsmask) & rdp->grpmask);
         trace_rcu_grace_period(rcu_state.name, READ_ONCE(rnp->gp_seq),
                                blkd ? TPS("cpuofl-bgp") : TPS("cpuofl"));
         return 0;
@@ -2420,7 +2414,7 @@ void rcu_force_quiescent_state(void)
         struct rcu_node *rnp_old = NULL;
  
         /* Funnel through hierarchy to reduce memory contention. */
-       rnp = __this_cpu_read(rcu_data.mynode);
+       rnp = raw_cpu_read(rcu_data.mynode);
         for (; rnp != NULL; rnp = rnp->parent) {
                 ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
                        !raw_spin_trylock(&rnp->fqslock);
@@ -2732,47 +2726,8 @@ static void check_cb_ovld(struct rcu_data *rdp)
         raw_spin_unlock_rcu_node(rnp);
  }
  
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.
- *
- * RCU read-side critical sections are delimited by rcu_read_lock()
- * and rcu_read_unlock(), and may be nested.  In addition, but only in
- * v5.0 and later, regions of code across which interrupts, preemption,
- * or softirqs have been disabled also serve as RCU read-side critical
- * sections.  This includes hardware interrupt handlers, softirq handlers,
- * and NMI handlers.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- *
- * Implementation of these memory-ordering guarantees is described here:
- * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
+static void
+__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
  {
         static atomic_t doublefrees;
         unsigned long flags;
@@ -2813,7 +2768,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
         }
  
         check_cb_ovld(rdp);
-       if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
+       if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
                 return; // Enqueued onto ->nocb_bypass, so just leave.
         // If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
         rcu_segcblist_enqueue(&rdp->cblist, head);
@@ -2835,8 +2790,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
                 local_irq_restore(flags);
         }
  }
-EXPORT_SYMBOL_GPL(call_rcu);
  
+#ifdef CONFIG_RCU_LAZY
+/**
+ * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
+ * flush all lazy callbacks (including the new one) to the main ->cblist while
+ * doing so.
+ *
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.
+ *
+ * Use this API instead of call_rcu() if you don't want the callback to be
+ * invoked after very long periods of time, which can happen on systems without
+ * memory pressure and on systems which are lightly loaded or mostly idle.
+ * This function will cause callbacks to be invoked sooner than later at the
+ * expense of extra power. Other than that, this function is identical to, and
+ * reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
+ * ordering and other functionality.
+ */
+void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
+{
+       return __call_rcu_common(head, func, false);
+}
+EXPORT_SYMBOL_GPL(call_rcu_hurry);
+#endif
+
+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * By default the callbacks are 'lazy' and are kept hidden from the main
+ * ->cblist to prevent starting of grace periods too soon.
+ * If you desire grace periods to start very soon, use call_rcu_hurry().
+ *
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.
+ *
+ * RCU read-side critical sections are delimited by rcu_read_lock()
+ * and rcu_read_unlock(), and may be nested.  In addition, but only in
+ * v5.0 and later, regions of code across which interrupts, preemption,
+ * or softirqs have been disabled also serve as RCU read-side critical
+ * sections.  This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ *
+ * Implementation of these memory-ordering guarantees is described here:
+ * Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+       return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
+}
+EXPORT_SYMBOL_GPL(call_rcu);
  
  /* Maximum number of jiffies to wait before draining a batch. */
  #define KFREE_DRAIN_JIFFIES (5 * HZ)
@@ -3511,7 +3542,7 @@ void synchronize_rcu(void)
                 if (rcu_gp_is_expedited())
                         synchronize_rcu_expedited();
                 else
-                       wait_rcu_gp(call_rcu);
+                       wait_rcu_gp(call_rcu_hurry);
                 return;
         }
  
@@ -3898,6 +3929,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
  {
         unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
         unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);
+       bool wake_nocb = false;
+       bool was_alldone = false;
  
         lockdep_assert_held(&rcu_state.barrier_lock);
         if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
@@ -3906,7 +3939,14 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
         rdp->barrier_head.func = rcu_barrier_callback;
         debug_rcu_head_queue(&rdp->barrier_head);
         rcu_nocb_lock(rdp);
-       WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+       /*
+        * Flush bypass and wakeup rcuog if we add callbacks to an empty regular
+        * queue. This way we don't wait for bypass timer that can reach seconds
+        * if it's fully lazy.
+        */
+       was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
+       WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
+       wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
         if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
                 atomic_inc(&rcu_state.barrier_cpu_count);
         } else {
@@ -3914,6 +3954,8 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
                 rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
         }
         rcu_nocb_unlock(rdp);
+       if (wake_nocb)
+               wake_nocb_gp(rdp, false);
         smp_store_release(&rdp->barrier_seq_snap, gseq);
  }
  
@@ -4280,8 +4322,6 @@ void rcu_report_dead(unsigned int cpu)
         // Do any dangling deferred wakeups.
         do_nocb_deferred_wakeup(rdp);
  
-       /* QS for any half-done expedited grace period. */
-       rcu_report_exp_rdp(rdp);
         rcu_preempt_deferred_qs(current);
  
         /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
@@ -4329,7 +4369,7 @@ void rcutree_migrate_callbacks(int cpu)
         my_rdp = this_cpu_ptr(&rcu_data);
         my_rnp = my_rdp->mynode;
         rcu_nocb_lock(my_rdp); /* irqs already disabled. */
-       WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
+       WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
         raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
         /* Leverage recent GPs and set GP for new callbacks. */
         needwake = rcu_advance_cbs(my_rnp, rdp) ||