irq_work, smp: Allow irq_work on call_single_queue

author Peter Zijlstra <peterz@infradead.org>

Tue, 26 May 2020 16:11:02 +0000 (18:11 +0200)

committer Ingo Molnar <mingo@kernel.org>

Thu, 28 May 2020 08:54:15 +0000 (10:54 +0200)
author Peter Zijlstra <peterz@infradead.org>
Tue, 26 May 2020 16:11:02 +0000 (18:11 +0200)
committer Ingo Molnar <mingo@kernel.org>
Thu, 28 May 2020 08:54:15 +0000 (10:54 +0200)
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

index 3b752e8..f23a359 100644 (file)
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -13,6 +13,8 @@
   * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
   */
  
+/* flags share CSD_FLAG_ space */
+
  #define IRQ_WORK_PENDING       BIT(0)
  #define IRQ_WORK_BUSY          BIT(1)
  
@@ -23,9 +25,12 @@
  
  #define IRQ_WORK_CLAIMED       (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
  
+/*
+ * structure shares layout with single_call_data_t.
+ */
  struct irq_work {
-       atomic_t flags;
         struct llist_node llnode;
+       atomic_t flags;
         void (*func)(struct irq_work *);
  };
  
diff --git a/include/linux/smp.h b/include/linux/smp.h

index cbc9162..45ad6e3 100644 (file)
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -16,17 +16,38 @@
  
  typedef void (*smp_call_func_t)(void *info);
  typedef bool (*smp_cond_func_t)(int cpu, void *info);
+
+enum {
+       CSD_FLAG_LOCK           = 0x01,
+
+       /* IRQ_WORK_flags */
+
+       CSD_TYPE_ASYNC          = 0x00,
+       CSD_TYPE_SYNC           = 0x10,
+       CSD_TYPE_IRQ_WORK       = 0x20,
+       CSD_FLAG_TYPE_MASK      = 0xF0,
+};
+
+/*
+ * structure shares (partial) layout with struct irq_work
+ */
  struct __call_single_data {
         struct llist_node llist;
+       unsigned int flags;
         smp_call_func_t func;
         void *info;
-       unsigned int flags;
  };
  
  /* Use __aligned() to avoid to use 2 cache lines for 1 csd */
  typedef struct __call_single_data call_single_data_t
         __aligned(sizeof(struct __call_single_data));
  
+/*
+ * Enqueue a llist_node on the call_single_queue; be very careful, read
+ * flush_smp_call_function_queue() in detail.
+ */
+extern void __smp_call_single_queue(int cpu, struct llist_node *node);
+
  /* total number of cpus in this system (may exceed NR_CPUS) */
  extern unsigned int total_cpus;
  
diff --git a/kernel/irq_work.c b/kernel/irq_work.c

index 48b5d1b..eca8396 100644 (file)
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
  {
         int oflags;
  
-       oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
+       oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
         /*
          * If the work is already pending, no need to raise the IPI.
          * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
         if (cpu != smp_processor_id()) {
                 /* Arch remote IPI send/receive backend aren't NMI safe */
                 WARN_ON_ONCE(in_nmi());
-               if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
-                       arch_send_call_function_single_ipi(cpu);
+               __smp_call_single_queue(cpu, &work->llnode);
         } else {
                 __irq_work_queue_local(work);
         }
@@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void)
         return true;
  }
  
+void irq_work_single(void *arg)
+{
+       struct irq_work *work = arg;
+       int flags;
+
+       /*
+        * Clear the PENDING bit, after this point the @work
+        * can be re-used.
+        * Make it immediately visible so that other CPUs trying
+        * to claim that work don't rely on us to handle their data
+        * while we are in the middle of the func.
+        */
+       flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
+
+       lockdep_irq_work_enter(work);
+       work->func(work);
+       lockdep_irq_work_exit(work);
+       /*
+        * Clear the BUSY bit and return to the free state if
+        * no-one else claimed it meanwhile.
+        */
+       flags &= ~IRQ_WORK_PENDING;
+       (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
+}
+
  static void irq_work_run_list(struct llist_head *list)
  {
         struct irq_work *work, *tmp;
@@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list)
                 return;
  
         llnode = llist_del_all(list);
-       llist_for_each_entry_safe(work, tmp, llnode, llnode) {
-               int flags;
-               /*
-                * Clear the PENDING bit, after this point the @work
-                * can be re-used.
-                * Make it immediately visible so that other CPUs trying
-                * to claim that work don't rely on us to handle their data
-                * while we are in the middle of the func.
-                */
-               flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
-
-               lockdep_irq_work_enter(work);
-               work->func(work);
-               lockdep_irq_work_exit(work);
-               /*
-                * Clear the BUSY bit and return to the free state if
-                * no-one else claimed it meanwhile.
-                */
-               flags &= ~IRQ_WORK_PENDING;
-               (void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
-       }
+       llist_for_each_entry_safe(work, tmp, llnode, llnode)
+               irq_work_single(work);
  }
  
  /*
diff --git a/kernel/smp.c b/kernel/smp.c

index 9f11813..856562b 100644 (file)
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -23,10 +23,8 @@
  
  #include "smpboot.h"
  
-enum {
-       CSD_FLAG_LOCK           = 0x01,
-       CSD_FLAG_SYNCHRONOUS    = 0x02,
-};
+
+#define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK)
  
  struct call_function_data {
         call_single_data_t      __percpu *csd;
@@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
  
  extern void send_call_function_single_ipi(int cpu);
  
+void __smp_call_single_queue(int cpu, struct llist_node *node)
+{
+       /*
+        * The list addition should be visible before sending the IPI
+        * handler locks the list to pull the entry off it because of
+        * normal cache coherency rules implied by spinlocks.
+        *
+        * If IPIs can go out of order to the cache coherency protocol
+        * in an architecture, sufficient synchronisation should be added
+        * to arch code to make it appear to obey cache coherency WRT
+        * locking and barrier primitives. Generic code isn't really
+        * equipped to do the right thing...
+        */
+       if (llist_add(node, &per_cpu(call_single_queue, cpu)))
+               send_call_function_single_ipi(cpu);
+}
+
  /*
   * Insert a previously allocated call_single_data_t element
   * for execution on the given CPU. data must already have
   * ->func, ->info, and ->flags set.
   */
-static int generic_exec_single(int cpu, call_single_data_t *csd,
-                              smp_call_func_t func, void *info)
+static int generic_exec_single(int cpu, call_single_data_t *csd)
  {
         if (cpu == smp_processor_id()) {
+               smp_call_func_t func = csd->func;
+               void *info = csd->info;
                 unsigned long flags;
  
                 /*
@@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
                 return 0;
         }
  
-
         if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
                 csd_unlock(csd);
                 return -ENXIO;
         }
  
-       csd->func = func;
-       csd->info = info;
-
-       /*
-        * The list addition should be visible before sending the IPI
-        * handler locks the list to pull the entry off it because of
-        * normal cache coherency rules implied by spinlocks.
-        *
-        * If IPIs can go out of order to the cache coherency protocol
-        * in an architecture, sufficient synchronisation should be added
-        * to arch code to make it appear to obey cache coherency WRT
-        * locking and barrier primitives. Generic code isn't really
-        * equipped to do the right thing...
-        */
-       if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
-               send_call_function_single_ipi(cpu);
+       __smp_call_single_queue(cpu, &csd->llist);
  
         return 0;
  }
@@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
  void generic_smp_call_function_single_interrupt(void)
  {
         flush_smp_call_function_queue(true);
-
-       /*
-        * Handle irq works queued remotely by irq_work_queue_on().
-        * Smp functions above are typically synchronous so they
-        * better run first since some other CPUs may be busy waiting
-        * for them.
-        */
-       irq_work_run();
  }
  
+extern void irq_work_single(void *);
+
  /**
   * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
   *
@@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
                  * We don't have to use the _safe() variant here
                  * because we are not invoking the IPI handlers yet.
                  */
-               llist_for_each_entry(csd, entry, llist)
-                       pr_warn("IPI callback %pS sent to offline CPU\n",
-                               csd->func);
+               llist_for_each_entry(csd, entry, llist) {
+                       switch (CSD_TYPE(csd)) {
+                       case CSD_TYPE_ASYNC:
+                       case CSD_TYPE_SYNC:
+                       case CSD_TYPE_IRQ_WORK:
+                               pr_warn("IPI callback %pS sent to offline CPU\n",
+                                       csd->func);
+                               break;
+
+                       default:
+                               pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
+                                       CSD_TYPE(csd));
+                               break;
+                       }
+               }
         }
  
         /*
@@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
          */
         prev = NULL;
         llist_for_each_entry_safe(csd, csd_next, entry, llist) {
-               smp_call_func_t func = csd->func;
-               void *info = csd->info;
-
                 /* Do we wait until *after* callback? */
-               if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
+               if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
+                       smp_call_func_t func = csd->func;
+                       void *info = csd->info;
+
                         if (prev) {
                                 prev->next = &csd_next->llist;
                         } else {
                                 entry = &csd_next->llist;
                         }
+
                         func(info);
                         csd_unlock(csd);
                 } else {
@@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
          * Second; run all !SYNC callbacks.
          */
         llist_for_each_entry_safe(csd, csd_next, entry, llist) {
-               smp_call_func_t func = csd->func;
-               void *info = csd->info;
+               int type = CSD_TYPE(csd);
  
-               csd_unlock(csd);
-               func(info);
+               if (type == CSD_TYPE_ASYNC) {
+                       smp_call_func_t func = csd->func;
+                       void *info = csd->info;
+
+                       csd_unlock(csd);
+                       func(info);
+               } else if (type == CSD_TYPE_IRQ_WORK) {
+                       irq_work_single(csd);
+               }
         }
  }
  
@@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
  {
         call_single_data_t *csd;
         call_single_data_t csd_stack = {
-               .flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
+               .flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
         };
         int this_cpu;
         int err;
@@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
                 csd_lock(csd);
         }
  
-       err = generic_exec_single(cpu, csd, func, info);
+       csd->func = func;
+       csd->info = info;
+
+       err = generic_exec_single(cpu, csd);
  
         if (wait)
                 csd_lock_wait(csd);
@@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
         csd->flags = CSD_FLAG_LOCK;
         smp_wmb();
  
-       err = generic_exec_single(cpu, csd, csd->func, csd->info);
+       err = generic_exec_single(cpu, csd);
  
  out:
         preempt_enable();
@@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
  
                 csd_lock(csd);
                 if (wait)
-                       csd->flags |= CSD_FLAG_SYNCHRONOUS;
+                       csd->flags |= CSD_TYPE_SYNC;
                 csd->func = func;
                 csd->info = info;
                 if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
@@ -632,6 +648,17 @@ void __init smp_init(void)
  {
         int num_nodes, num_cpus;
  
+       /*
+        * Ensure struct irq_work layout matches so that
+        * flush_smp_call_function_queue() can do horrible things.
+        */
+       BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
+                    offsetof(struct __call_single_data, llist));
+       BUILD_BUG_ON(offsetof(struct irq_work, func) !=
+                    offsetof(struct __call_single_data, func));
+       BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
+                    offsetof(struct __call_single_data, flags));
+
         idle_threads_init();
         cpuhp_threads_init();
author	Peter Zijlstra <peterz@infradead.org>
	Tue, 26 May 2020 16:11:02 +0000 (18:11 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Thu, 28 May 2020 08:54:15 +0000 (10:54 +0200)
include/linux/irq_work.h		patch \| blob \| history
include/linux/smp.h		patch \| blob \| history
kernel/irq_work.c		patch \| blob \| history
kernel/smp.c		patch \| blob \| history