powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask
authorNicholas Piggin <npiggin@gmail.com>
Thu, 17 Dec 2020 13:47:30 +0000 (23:47 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Mon, 8 Feb 2021 14:09:45 +0000 (01:09 +1100)
serialize_against_pte_lookup() performs IPIs to all CPUs in mm_cpumask.
Take this opportunity to try trim the CPU out of mm_cpumask. This can
reduce the cost of future serialize_against_pte_lookup() and/or the
cost of future TLB flushes.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201217134731.488135-7-npiggin@gmail.com
arch/powerpc/mm/book3s64/pgtable.c
arch/powerpc/mm/book3s64/radix_tlb.c

index 5b3a3ba..78c492e 100644 (file)
@@ -79,10 +79,17 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
        return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
 }
 
-static void do_nothing(void *unused)
-{
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
 
+static void do_serialize(void *arg)
+{
+       /* We've taken the IPI, so try to trim the mask while here */
+       if (radix_enabled()) {
+               struct mm_struct *mm = arg;
+               exit_lazy_flush_tlb(mm, false);
+       }
 }
+
 /*
  * Serialize against find_current_mm_pte which does lock-less
  * lookup in page tables with local interrupts disabled. For huge pages
@@ -96,7 +103,7 @@ static void do_nothing(void *unused)
 void serialize_against_pte_lookup(struct mm_struct *mm)
 {
        smp_mb();
-       smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1);
+       smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
 }
 
 /*
index 1ef5d4e..d7f1a6b 100644 (file)
@@ -639,7 +639,11 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm)
        return false;
 }
 
-static void exit_lazy_flush_tlb(struct mm_struct *mm)
+/*
+ * If always_flush is true, then flush even if this CPU can't be removed
+ * from mm_cpumask.
+ */
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
 {
        unsigned long pid = mm->context.id;
        int cpu = smp_processor_id();
@@ -652,7 +656,7 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm)
         * done with interrupts off.
         */
        if (current->mm == mm)
-               goto out_flush;
+               goto out;
 
        if (current->active_mm == mm) {
                WARN_ON_ONCE(current->mm != NULL);
@@ -674,17 +678,19 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm)
        if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
                atomic_dec(&mm->context.active_cpus);
                cpumask_clear_cpu(cpu, mm_cpumask(mm));
+               always_flush = true;
        }
 
-out_flush:
-       _tlbiel_pid(pid, RIC_FLUSH_ALL);
+out:
+       if (always_flush)
+               _tlbiel_pid(pid, RIC_FLUSH_ALL);
 }
 
 #ifdef CONFIG_SMP
 static void do_exit_flush_lazy_tlb(void *arg)
 {
        struct mm_struct *mm = arg;
-       exit_lazy_flush_tlb(mm);
+       exit_lazy_flush_tlb(mm, true);
 }
 
 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
@@ -746,7 +752,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
                         * to trim.
                         */
                        if (tick_and_test_trim_clock()) {
-                               exit_lazy_flush_tlb(mm);
+                               exit_lazy_flush_tlb(mm, true);
                                return FLUSH_TYPE_NONE;
                        }
                }
@@ -792,7 +798,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
                if (current->mm == mm)
                        return FLUSH_TYPE_LOCAL;
                if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
-                       exit_lazy_flush_tlb(mm);
+                       exit_lazy_flush_tlb(mm, true);
                return FLUSH_TYPE_NONE;
        }