Merge tag 'powerpc-6.6-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[platform/kernel/linux-rpi.git] / arch / powerpc / mm / book3s64 / radix_tlb.c
index 3020a8b..39acc2c 100644 (file)
@@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
        trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
-static __always_inline void __tlbie_pid_lpid(unsigned long pid,
-                                            unsigned long lpid,
-                                            unsigned long ric)
-{
-       unsigned long rb, rs, prs, r;
-
-       rb = PPC_BIT(53); /* IS = 1 */
-       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
-       prs = 1; /* process scoped */
-       r = 1;   /* radix format */
-
-       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-       trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
 {
        unsigned long rb,rs,prs,r;
@@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
        trace_tlbie(0, 0, rb, rs, ric, prs, r);
 }
 
-static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
-                                           unsigned long lpid,
-                                           unsigned long ap, unsigned long ric)
-{
-       unsigned long rb, rs, prs, r;
-
-       rb = va & ~(PPC_BITMASK(52, 63));
-       rb |= ap << PPC_BITLSHIFT(58);
-       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
-       prs = 1; /* process scoped */
-       r = 1;   /* radix format */
-
-       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
-                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
-       trace_tlbie(0, 0, rb, rs, ric, prs, r);
-}
-
 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
                                            unsigned long ap, unsigned long ric)
 {
@@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
        }
 }
 
-static inline void fixup_tlbie_va_range_lpid(unsigned long va,
-                                            unsigned long pid,
-                                            unsigned long lpid,
-                                            unsigned long ap)
-{
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
-       }
-
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
-       }
-}
-
 static inline void fixup_tlbie_pid(unsigned long pid)
 {
        /*
@@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid)
        }
 }
 
-static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
-{
-       /*
-        * We can use any address for the invalidation, pick one which is
-        * probably unused as an optimisation.
-        */
-       unsigned long va = ((1UL << 52) - 1);
-
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
-       }
-
-       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
-               asm volatile("ptesync" : : : "memory");
-               __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
-                               RIC_FLUSH_TLB);
-       }
-}
-
 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
                                       unsigned long ap)
 {
@@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
-static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
-                                  unsigned long ric)
-{
-       asm volatile("ptesync" : : : "memory");
-
-       /*
-        * Workaround the fact that the "ric" argument to __tlbie_pid
-        * must be a compile-time contraint to match the "i" constraint
-        * in the asm statement.
-        */
-       switch (ric) {
-       case RIC_FLUSH_TLB:
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
-               fixup_tlbie_pid_lpid(pid, lpid);
-               break;
-       case RIC_FLUSH_PWC:
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
-               break;
-       case RIC_FLUSH_ALL:
-       default:
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
-               fixup_tlbie_pid_lpid(pid, lpid);
-       }
-       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
 struct tlbiel_pid {
        unsigned long pid;
        unsigned long ric;
@@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end,
        fixup_tlbie_va_range(addr - page_size, pid, ap);
 }
 
-static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
-                                        unsigned long pid, unsigned long lpid,
-                                        unsigned long page_size,
-                                        unsigned long psize)
-{
-       unsigned long addr;
-       unsigned long ap = mmu_get_ap(psize);
-
-       for (addr = start; addr < end; addr += page_size)
-               __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
-
-       fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
-}
-
 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
                                      unsigned long psize, unsigned long ric)
 {
@@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end,
        asm volatile("eieio; tlbsync; ptesync": : :"memory");
 }
 
-static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
-                                       unsigned long pid, unsigned long lpid,
-                                       unsigned long page_size,
-                                       unsigned long psize, bool also_pwc)
-{
-       asm volatile("ptesync" : : : "memory");
-       if (also_pwc)
-               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
-       __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
-       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-}
-
 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
                                unsigned long start, unsigned long end,
                                unsigned long pid, unsigned long page_size,
@@ -820,7 +701,7 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
         * that's what the caller expects.
         */
        if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
-               atomic_dec(&mm->context.active_cpus);
+               dec_mm_active_cpus(mm);
                cpumask_clear_cpu(cpu, mm_cpumask(mm));
                always_flush = true;
        }
@@ -1316,7 +1197,35 @@ void radix__tlb_flush(struct mmu_gather *tlb)
         * See the comment for radix in arch_exit_mmap().
         */
        if (tlb->fullmm) {
-               __flush_all_mm(mm, true);
+               if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+                       /*
+                        * Shootdown based lazy tlb mm refcounting means we
+                        * have to IPI everyone in the mm_cpumask anyway soon
+                        * when the mm goes away, so might as well do it as
+                        * part of the final flush now.
+                        *
+                        * If lazy shootdown was improved to reduce IPIs (e.g.,
+                        * by batching), then it may end up being better to use
+                        * tlbies here instead.
+                        */
+                       preempt_disable();
+
+                       smp_mb(); /* see radix__flush_tlb_mm */
+                       exit_flush_lazy_tlbs(mm);
+                       _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL);
+
+                       /*
+                        * It should not be possible to have coprocessors still
+                        * attached here.
+                        */
+                       if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0))
+                               __flush_all_mm(mm, true);
+
+                       preempt_enable();
+               } else {
+                       __flush_all_mm(mm, true);
+               }
+
        } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
                if (!tlb->freed_tables)
                        radix__flush_tlb_mm(mm);
@@ -1497,6 +1406,127 @@ void radix__flush_tlb_all(void)
 }
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+                                            unsigned long lpid,
+                                            unsigned long ric)
+{
+       unsigned long rb, rs, prs, r;
+
+       rb = PPC_BIT(53); /* IS = 1 */
+       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+       prs = 1; /* process scoped */
+       r = 1;   /* radix format */
+
+       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+       trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+                                           unsigned long lpid,
+                                           unsigned long ap, unsigned long ric)
+{
+       unsigned long rb, rs, prs, r;
+
+       rb = va & ~(PPC_BITMASK(52, 63));
+       rb |= ap << PPC_BITLSHIFT(58);
+       rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+       prs = 1; /* process scoped */
+       r = 1;   /* radix format */
+
+       asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+                    : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+       trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+       /*
+        * We can use any address for the invalidation, pick one which is
+        * probably unused as an optimisation.
+        */
+       unsigned long va = ((1UL << 52) - 1);
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+                               RIC_FLUSH_TLB);
+       }
+}
+
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+                                  unsigned long ric)
+{
+       asm volatile("ptesync" : : : "memory");
+
+       /*
+        * Workaround the fact that the "ric" argument to __tlbie_pid
+        * must be a compile-time contraint to match the "i" constraint
+        * in the asm statement.
+        */
+       switch (ric) {
+       case RIC_FLUSH_TLB:
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+               fixup_tlbie_pid_lpid(pid, lpid);
+               break;
+       case RIC_FLUSH_PWC:
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+               break;
+       case RIC_FLUSH_ALL:
+       default:
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+               fixup_tlbie_pid_lpid(pid, lpid);
+       }
+       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+                                            unsigned long pid,
+                                            unsigned long lpid,
+                                            unsigned long ap)
+{
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+       }
+
+       if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+               asm volatile("ptesync" : : : "memory");
+               __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+       }
+}
+
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+                                        unsigned long pid, unsigned long lpid,
+                                        unsigned long page_size,
+                                        unsigned long psize)
+{
+       unsigned long addr;
+       unsigned long ap = mmu_get_ap(psize);
+
+       for (addr = start; addr < end; addr += page_size)
+               __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+       fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+                                       unsigned long pid, unsigned long lpid,
+                                       unsigned long page_size,
+                                       unsigned long psize, bool also_pwc)
+{
+       asm volatile("ptesync" : : : "memory");
+       if (also_pwc)
+               __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+       __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+       asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
 /*
  * Performs process-scoped invalidations for a given LPID
  * as part of H_RPT_INVALIDATE hcall.