Merge tag 'x86-mm-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 29 Apr 2021 18:41:43 +0000 (11:41 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 29 Apr 2021 18:41:43 +0000 (11:41 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 29 Apr 2021 18:41:43 +0000 (11:41 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 29 Apr 2021 18:41:43 +0000 (11:41 -0700)
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c

index c0ba887..bd13736 100644 (file)
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -52,8 +52,8 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
         return gva_n - offset;
  }
  
-static void hyperv_flush_tlb_others(const struct cpumask *cpus,
-                                   const struct flush_tlb_info *info)
+static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
+                                  const struct flush_tlb_info *info)
  {
         int cpu, vcpu, gva_n, max_gvas;
         struct hv_tlb_flush **flush_pcpu;
@@ -61,7 +61,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
         u64 status;
         unsigned long flags;
  
-       trace_hyperv_mmu_flush_tlb_others(cpus, info);
+       trace_hyperv_mmu_flush_tlb_multi(cpus, info);
  
         if (!hv_hypercall_pg)
                 goto do_native;
@@ -164,7 +164,7 @@ check_status:
         if (hv_result_success(status))
                 return;
  do_native:
-       native_flush_tlb_others(cpus, info);
+       native_flush_tlb_multi(cpus, info);
  }
  
  static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
@@ -239,6 +239,6 @@ void hyperv_setup_mmu_ops(void)
                 return;
  
         pr_info("Using hypercall for remote TLB flush\n");
-       pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
+       pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
         pv_ops.mmu.tlb_remove_table = tlb_remove_table;
  }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h

index 43992e5..da3a1ac 100644 (file)
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -63,7 +63,7 @@ static inline void slow_down_io(void)
  void native_flush_tlb_local(void);
  void native_flush_tlb_global(void);
  void native_flush_tlb_one_user(unsigned long addr);
-void native_flush_tlb_others(const struct cpumask *cpumask,
+void native_flush_tlb_multi(const struct cpumask *cpumask,
                              const struct flush_tlb_info *info);
  
  static inline void __flush_tlb_local(void)
@@ -81,10 +81,10 @@ static inline void __flush_tlb_one_user(unsigned long addr)
         PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
  }
  
-static inline void __flush_tlb_others(const struct cpumask *cpumask,
+static inline void __flush_tlb_multi(const struct cpumask *cpumask,
                                       const struct flush_tlb_info *info)
  {
-       PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
+       PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
  }
  
  static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h

index ae692c3..d9d6b02 100644 (file)
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -161,8 +161,8 @@ struct pv_mmu_ops {
         void (*flush_tlb_user)(void);
         void (*flush_tlb_kernel)(void);
         void (*flush_tlb_one_user)(unsigned long addr);
-       void (*flush_tlb_others)(const struct cpumask *cpus,
-                                const struct flush_tlb_info *info);
+       void (*flush_tlb_multi)(const struct cpumask *cpus,
+                               const struct flush_tlb_info *info);
  
         void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
  
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h

index 8c87a2e..fa952ea 100644 (file)
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -90,23 +90,6 @@ struct tlb_state {
         u16 next_asid;
  
         /*
-        * We can be in one of several states:
-        *
-        *  - Actively using an mm.  Our CPU's bit will be set in
-        *    mm_cpumask(loaded_mm) and is_lazy == false;
-        *
-        *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
-        *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
-        *
-        *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
-        *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
-        *    We're heuristically guessing that the CR3 load we
-        *    skipped more than makes up for the overhead added by
-        *    lazy mode.
-        */
-       bool is_lazy;
-
-       /*
          * If set we changed the page tables in such a way that we
          * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
          * This tells us to go invalidate all the non-loaded ctxs[]
@@ -151,7 +134,27 @@ struct tlb_state {
          */
         struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
  };
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
+DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);
+
+struct tlb_state_shared {
+       /*
+        * We can be in one of several states:
+        *
+        *  - Actively using an mm.  Our CPU's bit will be set in
+        *    mm_cpumask(loaded_mm) and is_lazy == false;
+        *
+        *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
+        *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
+        *
+        *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
+        *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
+        *    We're heuristically guessing that the CR3 load we
+        *    skipped more than makes up for the overhead added by
+        *    lazy mode.
+        */
+       bool is_lazy;
+};
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
  
  bool nmi_uaccess_okay(void);
  #define nmi_uaccess_okay nmi_uaccess_okay
@@ -175,7 +178,7 @@ extern void initialize_tlbstate_and_flush(void);
   *  - flush_tlb_page(vma, vmaddr) flushes one page
   *  - flush_tlb_range(vma, start, end) flushes a range of pages
   *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
+ *  - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
   *
   * ..but the i386 has somewhat limited tlb flushing capabilities,
   * and page-granular flushes are available only on i486 and up.
@@ -201,14 +204,15 @@ struct flush_tlb_info {
         unsigned long           start;
         unsigned long           end;
         u64                     new_tlb_gen;
-       unsigned int            stride_shift;
-       bool                    freed_tables;
+       unsigned int            initiating_cpu;
+       u8                      stride_shift;
+       u8                      freed_tables;
  };
  
  void flush_tlb_local(void);
  void flush_tlb_one_user(unsigned long addr);
  void flush_tlb_one_kernel(unsigned long addr);
-void flush_tlb_others(const struct cpumask *cpumask,
+void flush_tlb_multi(const struct cpumask *cpumask,
                       const struct flush_tlb_info *info);
  
  #ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h

index 4d705cb..a8e5a7a 100644 (file)
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -8,7 +8,7 @@
  
  #if IS_ENABLED(CONFIG_HYPERV)
  
-TRACE_EVENT(hyperv_mmu_flush_tlb_others,
+TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
             TP_PROTO(const struct cpumask *cpus,
                      const struct flush_tlb_info *info),
             TP_ARGS(cpus, info),
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c

index 84ec0ba..6974b51 100644 (file)
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -706,7 +706,7 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
          * with a stale address space WITHOUT being in lazy mode after
          * restoring the previous mm.
          */
-       if (this_cpu_read(cpu_tlbstate.is_lazy))
+       if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
                 leave_mm(smp_processor_id());
  
         temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index 172c947..5d32fa4 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -613,7 +613,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu)
  }
  #endif
  
-static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+static void kvm_flush_tlb_multi(const struct cpumask *cpumask,
                         const struct flush_tlb_info *info)
  {
         u8 state;
@@ -627,6 +627,11 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
          * queue flush_on_enter for pre-empted vCPUs
          */
         for_each_cpu(cpu, flushmask) {
+               /*
+                * The local vCPU is never preempted, so we do not explicitly
+                * skip check for local vCPU - it will never be cleared from
+                * flushmask.
+                */
                 src = &per_cpu(steal_time, cpu);
                 state = READ_ONCE(src->preempted);
                 if ((state & KVM_VCPU_PREEMPTED)) {
@@ -636,7 +641,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask,
                 }
         }
  
-       native_flush_tlb_others(flushmask, info);
+       native_flush_tlb_multi(flushmask, info);
  }
  
  static void __init kvm_guest_init(void)
@@ -654,7 +659,7 @@ static void __init kvm_guest_init(void)
         }
  
         if (pv_tlb_flush_supported()) {
-               pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
+               pv_ops.mmu.flush_tlb_multi = kvm_flush_tlb_multi;
                 pv_ops.mmu.tlb_remove_table = tlb_remove_table;
                 pr_info("KVM setup pv remote TLB flush\n");
         }
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c

index d073026..04cafc0 100644 (file)
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -291,7 +291,7 @@ struct paravirt_patch_template pv_ops = {
         .mmu.flush_tlb_user     = native_flush_tlb_local,
         .mmu.flush_tlb_kernel   = native_flush_tlb_global,
         .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
-       .mmu.flush_tlb_others   = native_flush_tlb_others,
+       .mmu.flush_tlb_multi    = native_flush_tlb_multi,
         .mmu.tlb_remove_table   =
                         (void (*)(struct mmu_gather *, void *))tlb_remove_page,
  
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c

index fbf41dd..75ef19a 100644 (file)
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -1017,7 +1017,7 @@ void __init zone_sizes_init(void)
         free_area_init(max_zone_pfns);
  }
  
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
+__visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
         .loaded_mm = &init_mm,
         .next_asid = 1,
         .cr4 = ~0UL,    /* fail hard if we screw up cr4 shadow initialization */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c

index 98f2695..7880468 100644 (file)
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -24,7 +24,7 @@
  # define __flush_tlb_local             native_flush_tlb_local
  # define __flush_tlb_global            native_flush_tlb_global
  # define __flush_tlb_one_user(addr)    native_flush_tlb_one_user(addr)
-# define __flush_tlb_others(msk, info) native_flush_tlb_others(msk, info)
+# define __flush_tlb_multi(msk, info)  native_flush_tlb_multi(msk, info)
  #endif
  
  /*
@@ -300,7 +300,7 @@ void leave_mm(int cpu)
                 return;
  
         /* Warn if we're not lazy. */
-       WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
+       WARN_ON(!this_cpu_read(cpu_tlbstate_shared.is_lazy));
  
         switch_mm(NULL, &init_mm, NULL);
  }
@@ -316,7 +316,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
         local_irq_restore(flags);
  }
  
-static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
+static unsigned long mm_mangle_tif_spec_ib(struct task_struct *next)
  {
         unsigned long next_tif = task_thread_info(next)->flags;
         unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB;
@@ -424,7 +424,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
  {
         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
-       bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
+       bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
         unsigned cpu = smp_processor_id();
         u64 next_tlb_gen;
         bool need_flush;
@@ -439,7 +439,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
          * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
          */
  
-       /* We don't want flush_tlb_func_* to run concurrently with us. */
+       /* We don't want flush_tlb_func() to run concurrently with us. */
         if (IS_ENABLED(CONFIG_PROVE_LOCKING))
                 WARN_ON_ONCE(!irqs_disabled());
  
@@ -469,7 +469,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 __flush_tlb_all();
         }
  #endif
-       this_cpu_write(cpu_tlbstate.is_lazy, false);
+       if (was_lazy)
+               this_cpu_write(cpu_tlbstate_shared.is_lazy, false);
  
         /*
          * The membarrier system call requires a full memory barrier and
@@ -490,7 +491,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                 /*
                  * Even in lazy TLB mode, the CPU should stay set in the
                  * mm_cpumask. The TLB shootdown code can figure out from
-                * from cpu_tlbstate.is_lazy whether or not to send an IPI.
+                * cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
                  */
                 if (WARN_ON_ONCE(real_prev != &init_mm &&
                                  !cpumask_test_cpu(cpu, mm_cpumask(next))))
@@ -598,7 +599,7 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
         if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
                 return;
  
-       this_cpu_write(cpu_tlbstate.is_lazy, true);
+       this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
  }
  
  /*
@@ -647,14 +648,13 @@ void initialize_tlbstate_and_flush(void)
  }
  
  /*
- * flush_tlb_func_common()'s memory ordering requirement is that any
+ * flush_tlb_func()'s memory ordering requirement is that any
   * TLB fills that happen after we flush the TLB are ordered after we
   * read active_mm's tlb_gen.  We don't need any explicit barriers
   * because all x86 flush operations are serializing and the
   * atomic64_read operation won't be reordered by the compiler.
   */
-static void flush_tlb_func_common(const struct flush_tlb_info *f,
-                                 bool local, enum tlb_flush_reason reason)
+static void flush_tlb_func(void *info)
  {
         /*
          * We have three different tlb_gen values in here.  They are:
@@ -665,28 +665,40 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
          * - f->new_tlb_gen: the generation that the requester of the flush
          *                   wants us to catch up to.
          */
+       const struct flush_tlb_info *f = info;
         struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
         u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
         u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
         u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
+       bool local = smp_processor_id() == f->initiating_cpu;
+       unsigned long nr_invalidate = 0;
  
         /* This code cannot presently handle being reentered. */
         VM_WARN_ON(!irqs_disabled());
  
+       if (!local) {
+               inc_irq_stat(irq_tlb_count);
+               count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+
+               /* Can only happen on remote CPUs */
+               if (f->mm && f->mm != loaded_mm)
+                       return;
+       }
+
         if (unlikely(loaded_mm == &init_mm))
                 return;
  
         VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
                    loaded_mm->context.ctx_id);
  
-       if (this_cpu_read(cpu_tlbstate.is_lazy)) {
+       if (this_cpu_read(cpu_tlbstate_shared.is_lazy)) {
                 /*
                  * We're in lazy mode.  We need to at least flush our
                  * paging-structure cache to avoid speculatively reading
                  * garbage into our TLB.  Since switching to init_mm is barely
                  * slower than a minimal flush, just switch to init_mm.
                  *
-                * This should be rare, with native_flush_tlb_others skipping
+                * This should be rare, with native_flush_tlb_multi() skipping
                  * IPIs to lazy TLB mode CPUs.
                  */
                 switch_mm_irqs_off(NULL, &init_mm, NULL);
@@ -700,8 +712,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
                  * be handled can catch us all the way up, leaving no work for
                  * the second flush.
                  */
-               trace_tlb_flush(reason, 0);
-               return;
+               goto done;
         }
  
         WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
@@ -748,56 +759,54 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
             f->new_tlb_gen == local_tlb_gen + 1 &&
             f->new_tlb_gen == mm_tlb_gen) {
                 /* Partial flush */
-               unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
                 unsigned long addr = f->start;
  
+               nr_invalidate = (f->end - f->start) >> f->stride_shift;
+
                 while (addr < f->end) {
                         flush_tlb_one_user(addr);
                         addr += 1UL << f->stride_shift;
                 }
                 if (local)
                         count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
-               trace_tlb_flush(reason, nr_invalidate);
         } else {
                 /* Full flush. */
+               nr_invalidate = TLB_FLUSH_ALL;
+
                 flush_tlb_local();
                 if (local)
                         count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
-               trace_tlb_flush(reason, TLB_FLUSH_ALL);
         }
  
         /* Both paths above update our state to mm_tlb_gen. */
         this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
-}
-
-static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
-{
-       const struct flush_tlb_info *f = info;
  
-       flush_tlb_func_common(f, true, reason);
+       /* Tracing is done in a unified manner to reduce the code size */
+done:
+       trace_tlb_flush(!local ? TLB_REMOTE_SHOOTDOWN :
+                               (f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :
+                                                 TLB_LOCAL_MM_SHOOTDOWN,
+                       nr_invalidate);
  }
  
-static void flush_tlb_func_remote(void *info)
+static bool tlb_is_not_lazy(int cpu)
  {
-       const struct flush_tlb_info *f = info;
-
-       inc_irq_stat(irq_tlb_count);
-
-       if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
-               return;
-
-       count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
-       flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
+       return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
  }
  
-static bool tlb_is_not_lazy(int cpu, void *data)
-{
-       return !per_cpu(cpu_tlbstate.is_lazy, cpu);
-}
+static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
+
+DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
+EXPORT_PER_CPU_SYMBOL(cpu_tlbstate_shared);
  
-STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
+STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
                                          const struct flush_tlb_info *info)
  {
+       /*
+        * Do accounting and tracing. Note that there are (and have always been)
+        * cases in which a remote TLB flush will be traced, but eventually
+        * would not happen.
+        */
         count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
         if (info->end == TLB_FLUSH_ALL)
                 trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
@@ -815,18 +824,42 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
          * up on the new contents of what used to be page tables, while
          * doing a speculative memory access.
          */
-       if (info->freed_tables)
-               smp_call_function_many(cpumask, flush_tlb_func_remote,
-                              (void *)info, 1);
-       else
-               on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
-                               (void *)info, 1, cpumask);
+       if (info->freed_tables) {
+               on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
+       } else {
+               /*
+                * Although we could have used on_each_cpu_cond_mask(),
+                * open-coding it has performance advantages, as it eliminates
+                * the need for indirect calls or retpolines. In addition, it
+                * allows to use a designated cpumask for evaluating the
+                * condition, instead of allocating one.
+                *
+                * This code works under the assumption that there are no nested
+                * TLB flushes, an assumption that is already made in
+                * flush_tlb_mm_range().
+                *
+                * cond_cpumask is logically a stack-local variable, but it is
+                * more efficient to have it off the stack and not to allocate
+                * it on demand. Preemption is disabled and this code is
+                * non-reentrant.
+                */
+               struct cpumask *cond_cpumask = this_cpu_ptr(&flush_tlb_mask);
+               int cpu;
+
+               cpumask_clear(cond_cpumask);
+
+               for_each_cpu(cpu, cpumask) {
+                       if (tlb_is_not_lazy(cpu))
+                               __cpumask_set_cpu(cpu, cond_cpumask);
+               }
+               on_each_cpu_mask(cond_cpumask, flush_tlb_func, (void *)info, true);
+       }
  }
  
-void flush_tlb_others(const struct cpumask *cpumask,
+void flush_tlb_multi(const struct cpumask *cpumask,
                       const struct flush_tlb_info *info)
  {
-       __flush_tlb_others(cpumask, info);
+       __flush_tlb_multi(cpumask, info);
  }
  
  /*
@@ -847,7 +880,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
  static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
  #endif
  
-static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
                         unsigned long start, unsigned long end,
                         unsigned int stride_shift, bool freed_tables,
                         u64 new_tlb_gen)
@@ -869,11 +902,12 @@ static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
         info->stride_shift      = stride_shift;
         info->freed_tables      = freed_tables;
         info->new_tlb_gen       = new_tlb_gen;
+       info->initiating_cpu    = smp_processor_id();
  
         return info;
  }
  
-static inline void put_flush_tlb_info(void)
+static void put_flush_tlb_info(void)
  {
  #ifdef CONFIG_DEBUG_VM
         /* Complete reentrancy prevention checks */
@@ -905,16 +939,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
         info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
                                   new_tlb_gen);
  
-       if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
+       /*
+        * flush_tlb_multi() is not optimized for the common case in which only
+        * a local TLB flush is needed. Optimize this use-case by calling
+        * flush_tlb_func_local() directly in this case.
+        */
+       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
+               flush_tlb_multi(mm_cpumask(mm), info);
+       } else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
                 lockdep_assert_irqs_enabled();
                 local_irq_disable();
-               flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
+               flush_tlb_func(info);
                 local_irq_enable();
         }
  
-       if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
-               flush_tlb_others(mm_cpumask(mm), info);
-
         put_flush_tlb_info();
         put_cpu();
  }
@@ -1119,34 +1157,30 @@ void __flush_tlb_all(void)
  }
  EXPORT_SYMBOL_GPL(__flush_tlb_all);
  
-/*
- * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
- * This means that the 'struct flush_tlb_info' that describes which mappings to
- * flush is actually fixed. We therefore set a single fixed struct and use it in
- * arch_tlbbatch_flush().
- */
-static const struct flush_tlb_info full_flush_tlb_info = {
-       .mm = NULL,
-       .start = 0,
-       .end = TLB_FLUSH_ALL,
-};
-
  void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  {
+       struct flush_tlb_info *info;
+
         int cpu = get_cpu();
  
-       if (cpumask_test_cpu(cpu, &batch->cpumask)) {
+       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
+       /*
+        * flush_tlb_multi() is not optimized for the common case in which only
+        * a local TLB flush is needed. Optimize this use-case by calling
+        * flush_tlb_func_local() directly in this case.
+        */
+       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids) {
+               flush_tlb_multi(&batch->cpumask, info);
+       } else if (cpumask_test_cpu(cpu, &batch->cpumask)) {
                 lockdep_assert_irqs_enabled();
                 local_irq_disable();
-               flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
+               flush_tlb_func(info);
                 local_irq_enable();
         }
  
-       if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
-               flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
-
         cpumask_clear(&batch->cpumask);
  
+       put_flush_tlb_info();
         put_cpu();
  }
  
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c

index 1e28c88..ade789e 100644 (file)
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1247,8 +1247,8 @@ static void xen_flush_tlb_one_user(unsigned long addr)
         preempt_enable();
  }
  
-static void xen_flush_tlb_others(const struct cpumask *cpus,
-                                const struct flush_tlb_info *info)
+static void xen_flush_tlb_multi(const struct cpumask *cpus,
+                               const struct flush_tlb_info *info)
  {
         struct {
                 struct mmuext_op op;
@@ -1258,7 +1258,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
         const size_t mc_entry_size = sizeof(args->op) +
                 sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
  
-       trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
+       trace_xen_mmu_flush_tlb_multi(cpus, info->mm, info->start, info->end);
  
         if (cpumask_empty(cpus))
                 return;         /* nothing to do */
@@ -1267,9 +1267,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
         args = mcs.args;
         args->op.arg2.vcpumask = to_cpumask(args->mask);
  
-       /* Remove us, and any offline CPUS. */
+       /* Remove any offline CPUs */
         cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
-       cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
  
         args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
         if (info->end != TLB_FLUSH_ALL &&
@@ -2086,7 +2085,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
         .flush_tlb_user = xen_flush_tlb,
         .flush_tlb_kernel = xen_flush_tlb,
         .flush_tlb_one_user = xen_flush_tlb_one_user,
-       .flush_tlb_others = xen_flush_tlb_others,
+       .flush_tlb_multi = xen_flush_tlb_multi,
         .tlb_remove_table = tlb_remove_table,
  
         .pgd_alloc = xen_pgd_alloc,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h

index e6b948a..bfc4690 100644 (file)
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -206,7 +206,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
         return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits);
  }
  
-unsigned int cpumask_next(int n, const struct cpumask *srcp);
+unsigned int __pure cpumask_next(int n, const struct cpumask *srcp);
  
  /**
   * cpumask_next_zero - get the next unset cpu in a cpumask
@@ -223,8 +223,8 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
         return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
  }
  
-int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
-int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
+int __pure cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
+int __pure cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
  unsigned int cpumask_local_spread(unsigned int i, int node);
  int cpumask_any_and_distribute(const struct cpumask *src1p,
                                const struct cpumask *src2p);
diff --git a/include/linux/smp.h b/include/linux/smp.h

index 70c6f62..84a0b48 100644 (file)
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -50,30 +50,52 @@ extern unsigned int total_cpus;
  int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
                              int wait);
  
+void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
+                          void *info, bool wait, const struct cpumask *mask);
+
+int smp_call_function_single_async(int cpu, call_single_data_t *csd);
+
  /*
   * Call a function on all processors
   */
-void on_each_cpu(smp_call_func_t func, void *info, int wait);
+static inline void on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+       on_each_cpu_cond_mask(NULL, func, info, wait, cpu_online_mask);
+}
  
-/*
- * Call a function on processors specified by mask, which might include
- * the local one.
+/**
+ * on_each_cpu_mask(): Run a function on processors specified by
+ * cpumask, which may include the local processor.
+ * @mask: The set of cpus to run on (only runs on online subset).
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed
+ *        on other CPUs.
+ *
+ * If @wait is true, then returns once @func has returned.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.  The
+ * exception is that it may be used during early boot while
+ * early_boot_irqs_disabled is set.
   */
-void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
-               void *info, bool wait);
+static inline void on_each_cpu_mask(const struct cpumask *mask,
+                                   smp_call_func_t func, void *info, bool wait)
+{
+       on_each_cpu_cond_mask(NULL, func, info, wait, mask);
+}
  
  /*
   * Call a function on each processor for which the supplied function
   * cond_func returns a positive value. This may include the local
- * processor.
+ * processor.  May be used during early boot while early_boot_irqs_disabled is
+ * set. Use local_irq_save/restore() instead of local_irq_disable/enable().
   */
-void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
-                     void *info, bool wait);
-
-void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
-                          void *info, bool wait, const struct cpumask *mask);
-
-int smp_call_function_single_async(int cpu, call_single_data_t *csd);
+static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
+                                   smp_call_func_t func, void *info, bool wait)
+{
+       on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
+}
  
  #ifdef CONFIG_SMP
  
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h

index 3b61b58..44a3f56 100644 (file)
--- a/include/trace/events/xen.h
+++ b/include/trace/events/xen.h
@@ -346,7 +346,7 @@ TRACE_EVENT(xen_mmu_flush_tlb_one_user,
             TP_printk("addr %lx", __entry->addr)
         );
  
-TRACE_EVENT(xen_mmu_flush_tlb_others,
+TRACE_EVENT(xen_mmu_flush_tlb_multi,
             TP_PROTO(const struct cpumask *cpus, struct mm_struct *mm,
                      unsigned long addr, unsigned long end),
             TP_ARGS(cpus, mm, addr, end),
diff --git a/kernel/smp.c b/kernel/smp.c

index f472ef6..e210749 100644 (file)
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -850,12 +850,28 @@ call:
  }
  EXPORT_SYMBOL_GPL(smp_call_function_any);
  
+/*
+ * Flags to be used as scf_flags argument of smp_call_function_many_cond().
+ *
+ * %SCF_WAIT:          Wait until function execution is completed
+ * %SCF_RUN_LOCAL:     Run also locally if local cpu is set in cpumask
+ */
+#define SCF_WAIT       (1U << 0)
+#define SCF_RUN_LOCAL  (1U << 1)
+
  static void smp_call_function_many_cond(const struct cpumask *mask,
                                         smp_call_func_t func, void *info,
-                                       bool wait, smp_cond_func_t cond_func)
+                                       unsigned int scf_flags,
+                                       smp_cond_func_t cond_func)
  {
+       int cpu, last_cpu, this_cpu = smp_processor_id();
         struct call_function_data *cfd;
-       int cpu, next_cpu, this_cpu = smp_processor_id();
+       bool wait = scf_flags & SCF_WAIT;
+       bool run_remote = false;
+       bool run_local = false;
+       int nr_cpus = 0;
+
+       lockdep_assert_preemption_disabled();
  
         /*
          * Can deadlock when called with interrupts disabled.
@@ -863,8 +879,9 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
          * send smp call function interrupt to this cpu and as such deadlocks
          * can't happen.
          */
-       WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
-                    && !oops_in_progress && !early_boot_irqs_disabled);
+       if (cpu_online(this_cpu) && !oops_in_progress &&
+           !early_boot_irqs_disabled)
+               lockdep_assert_irqs_enabled();
  
         /*
          * When @wait we can deadlock when we interrupt between llist_add() and
@@ -874,70 +891,75 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
          */
         WARN_ON_ONCE(!in_task());
  
-       /* Try to fastpath.  So, what's a CPU they want? Ignoring this one. */
+       /* Check if we need local execution. */
+       if ((scf_flags & SCF_RUN_LOCAL) && cpumask_test_cpu(this_cpu, mask))
+               run_local = true;
+
+       /* Check if we need remote execution, i.e., any CPU excluding this one. */
         cpu = cpumask_first_and(mask, cpu_online_mask);
         if (cpu == this_cpu)
                 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+       if (cpu < nr_cpu_ids)
+               run_remote = true;
  
-       /* No online cpus?  We're done. */
-       if (cpu >= nr_cpu_ids)
-               return;
+       if (run_remote) {
+               cfd = this_cpu_ptr(&cfd_data);
+               cpumask_and(cfd->cpumask, mask, cpu_online_mask);
+               __cpumask_clear_cpu(this_cpu, cfd->cpumask);
  
-       /* Do we have another CPU which isn't us? */
-       next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
-       if (next_cpu == this_cpu)
-               next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
-
-       /* Fastpath: do that cpu by itself. */
-       if (next_cpu >= nr_cpu_ids) {
-               if (!cond_func || cond_func(cpu, info))
-                       smp_call_function_single(cpu, func, info, wait);
-               return;
-       }
+               cpumask_clear(cfd->cpumask_ipi);
+               for_each_cpu(cpu, cfd->cpumask) {
+                       struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
+                       call_single_data_t *csd = &pcpu->csd;
  
-       cfd = this_cpu_ptr(&cfd_data);
+                       if (cond_func && !cond_func(cpu, info))
+                               continue;
  
-       cpumask_and(cfd->cpumask, mask, cpu_online_mask);
-       __cpumask_clear_cpu(this_cpu, cfd->cpumask);
+                       csd_lock(csd);
+                       if (wait)
+                               csd->node.u_flags |= CSD_TYPE_SYNC;
+                       csd->func = func;
+                       csd->info = info;
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+                       csd->node.src = smp_processor_id();
+                       csd->node.dst = cpu;
+#endif
+                       cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
+                       if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
+                               __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
+                               nr_cpus++;
+                               last_cpu = cpu;
  
-       /* Some callers race with other cpus changing the passed mask */
-       if (unlikely(!cpumask_weight(cfd->cpumask)))
-               return;
+                               cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
+                       } else {
+                               cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
+                       }
+               }
  
-       cpumask_clear(cfd->cpumask_ipi);
-       for_each_cpu(cpu, cfd->cpumask) {
-               struct cfd_percpu *pcpu = per_cpu_ptr(cfd->pcpu, cpu);
-               call_single_data_t *csd = &pcpu->csd;
+               cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PING);
  
-               if (cond_func && !cond_func(cpu, info))
-                       continue;
+               /*
+                * Choose the most efficient way to send an IPI. Note that the
+                * number of CPUs might be zero due to concurrent changes to the
+                * provided mask.
+                */
+               if (nr_cpus == 1)
+                       send_call_function_single_ipi(last_cpu);
+               else if (likely(nr_cpus > 1))
+                       arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
  
-               csd_lock(csd);
-               if (wait)
-                       csd->node.u_flags |= CSD_TYPE_SYNC;
-               csd->func = func;
-               csd->info = info;
-#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
-               csd->node.src = smp_processor_id();
-               csd->node.dst = cpu;
-#endif
-               cfd_seq_store(pcpu->seq_queue, this_cpu, cpu, CFD_SEQ_QUEUE);
-               if (llist_add(&csd->node.llist, &per_cpu(call_single_queue, cpu))) {
-                       __cpumask_set_cpu(cpu, cfd->cpumask_ipi);
-                       cfd_seq_store(pcpu->seq_ipi, this_cpu, cpu, CFD_SEQ_IPI);
-               } else {
-                       cfd_seq_store(pcpu->seq_noipi, this_cpu, cpu, CFD_SEQ_NOIPI);
-               }
+               cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu, CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
         }
  
-       /* Send a message to all CPUs in the map */
-       cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->ping, this_cpu,
-                     CFD_SEQ_NOCPU, CFD_SEQ_PING);
-       arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
-       cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->pinged, this_cpu,
-                     CFD_SEQ_NOCPU, CFD_SEQ_PINGED);
+       if (run_local && (!cond_func || cond_func(this_cpu, info))) {
+               unsigned long flags;
  
-       if (wait) {
+               local_irq_save(flags);
+               func(info);
+               local_irq_restore(flags);
+       }
+
+       if (run_remote && wait) {
                 for_each_cpu(cpu, cfd->cpumask) {
                         call_single_data_t *csd;
  
@@ -948,12 +970,14 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
  }
  
  /**
- * smp_call_function_many(): Run a function on a set of other CPUs.
+ * smp_call_function_many(): Run a function on a set of CPUs.
   * @mask: The set of cpus to run on (only runs on online subset).
   * @func: The function to run. This must be fast and non-blocking.
   * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed
- *        on other CPUs.
+ * @flags: Bitmask that controls the operation. If %SCF_WAIT is set, wait
+ *        (atomically) until function has completed on other CPUs. If
+ *        %SCF_RUN_LOCAL is set, the function will also be run locally
+ *        if the local CPU is set in the @cpumask.
   *
   * If @wait is true, then returns once @func has returned.
   *
@@ -964,7 +988,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
  void smp_call_function_many(const struct cpumask *mask,
                             smp_call_func_t func, void *info, bool wait)
  {
-       smp_call_function_many_cond(mask, func, info, wait, NULL);
+       smp_call_function_many_cond(mask, func, info, wait * SCF_WAIT, NULL);
  }
  EXPORT_SYMBOL(smp_call_function_many);
  
@@ -1076,56 +1100,6 @@ void __init smp_init(void)
  }
  
  /*
- * Call a function on all processors.  May be used during early boot while
- * early_boot_irqs_disabled is set.  Use local_irq_save/restore() instead
- * of local_irq_disable/enable().
- */
-void on_each_cpu(smp_call_func_t func, void *info, int wait)
-{
-       unsigned long flags;
-
-       preempt_disable();
-       smp_call_function(func, info, wait);
-       local_irq_save(flags);
-       func(info);
-       local_irq_restore(flags);
-       preempt_enable();
-}
-EXPORT_SYMBOL(on_each_cpu);
-
-/**
- * on_each_cpu_mask(): Run a function on processors specified by
- * cpumask, which may include the local processor.
- * @mask: The set of cpus to run on (only runs on online subset).
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed
- *        on other CPUs.
- *
- * If @wait is true, then returns once @func has returned.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.  The
- * exception is that it may be used during early boot while
- * early_boot_irqs_disabled is set.
- */
-void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
-                       void *info, bool wait)
-{
-       int cpu = get_cpu();
-
-       smp_call_function_many(mask, func, info, wait);
-       if (cpumask_test_cpu(cpu, mask)) {
-               unsigned long flags;
-               local_irq_save(flags);
-               func(info);
-               local_irq_restore(flags);
-       }
-       put_cpu();
-}
-EXPORT_SYMBOL(on_each_cpu_mask);
-
-/*
   * on_each_cpu_cond(): Call a function on each processor for which
   * the supplied function cond_func returns true, optionally waiting
   * for all the required CPUs to finish. This may include the local
@@ -1150,27 +1124,17 @@ EXPORT_SYMBOL(on_each_cpu_mask);
  void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
                            void *info, bool wait, const struct cpumask *mask)
  {
-       int cpu = get_cpu();
+       unsigned int scf_flags = SCF_RUN_LOCAL;
  
-       smp_call_function_many_cond(mask, func, info, wait, cond_func);
-       if (cpumask_test_cpu(cpu, mask) && cond_func(cpu, info)) {
-               unsigned long flags;
+       if (wait)
+               scf_flags |= SCF_WAIT;
  
-               local_irq_save(flags);
-               func(info);
-               local_irq_restore(flags);
-       }
-       put_cpu();
+       preempt_disable();
+       smp_call_function_many_cond(mask, func, info, scf_flags, cond_func);
+       preempt_enable();
  }
  EXPORT_SYMBOL(on_each_cpu_cond_mask);
  
-void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
-                     void *info, bool wait)
-{
-       on_each_cpu_cond_mask(cond_func, func, info, wait, cpu_online_mask);
-}
-EXPORT_SYMBOL(on_each_cpu_cond);
-
  static void do_nothing(void *unused)
  {
  }
diff --git a/kernel/up.c b/kernel/up.c

index c6f323d..bf20b4a 100644 (file)
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -36,35 +36,6 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
  }
  EXPORT_SYMBOL(smp_call_function_single_async);
  
-void on_each_cpu(smp_call_func_t func, void *info, int wait)
-{
-       unsigned long flags;
-
-       local_irq_save(flags);
-       func(info);
-       local_irq_restore(flags);
-}
-EXPORT_SYMBOL(on_each_cpu);
-
-/*
- * Note we still need to test the mask even for UP
- * because we actually can get an empty mask from
- * code that on SMP might call us without the local
- * CPU in the mask.
- */
-void on_each_cpu_mask(const struct cpumask *mask,
-                     smp_call_func_t func, void *info, bool wait)
-{
-       unsigned long flags;
-
-       if (cpumask_test_cpu(0, mask)) {
-               local_irq_save(flags);
-               func(info);
-               local_irq_restore(flags);
-       }
-}
-EXPORT_SYMBOL(on_each_cpu_mask);
-
  /*
   * Preemption is disabled here to make sure the cond_func is called under the
   * same condtions in UP and SMP.
@@ -75,7 +46,7 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
         unsigned long flags;
  
         preempt_disable();
-       if (cond_func(0, info)) {
+       if ((!cond_func || cond_func(0, info)) && cpumask_test_cpu(0, mask)) {
                 local_irq_save(flags);
                 func(info);
                 local_irq_restore(flags);
@@ -84,13 +55,6 @@ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
  }
  EXPORT_SYMBOL(on_each_cpu_cond_mask);
  
-void on_each_cpu_cond(smp_cond_func_t cond_func, smp_call_func_t func,
-                     void *info, bool wait)
-{
-       on_each_cpu_cond_mask(cond_func, func, info, wait, NULL);
-}
-EXPORT_SYMBOL(on_each_cpu_cond);
-
  int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
  {
         int ret;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 29 Apr 2021 18:41:43 +0000 (11:41 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 29 Apr 2021 18:41:43 +0000 (11:41 -0700)
arch/x86/hyperv/mmu.c		patch \| blob \| history
arch/x86/include/asm/paravirt.h		patch \| blob \| history
arch/x86/include/asm/paravirt_types.h		patch \| blob \| history
arch/x86/include/asm/tlbflush.h		patch \| blob \| history
arch/x86/include/asm/trace/hyperv.h		patch \| blob \| history
arch/x86/kernel/alternative.c		patch \| blob \| history
arch/x86/kernel/kvm.c		patch \| blob \| history
arch/x86/kernel/paravirt.c		patch \| blob \| history
arch/x86/mm/init.c		patch \| blob \| history
arch/x86/mm/tlb.c		patch \| blob \| history
arch/x86/xen/mmu_pv.c		patch \| blob \| history
include/linux/cpumask.h		patch \| blob \| history
include/linux/smp.h		patch \| blob \| history
include/trace/events/xen.h		patch \| blob \| history
kernel/smp.c		patch \| blob \| history
kernel/up.c		patch \| blob \| history