Merge tag 'x86_mm_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
[platform/kernel/linux-starfive.git] / arch / x86 / mm / tlb.c
index 16c5292..267acf2 100644 (file)
@@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
        return ret;
 }
 
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
 {
+       unsigned long cr3 = __sme_pa(pgd) | lam;
+
        if (static_cpu_has(X86_FEATURE_PCID)) {
-               return __sme_pa(pgd) | kern_pcid(asid);
+               VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
+               cr3 |= kern_pcid(asid);
        } else {
                VM_WARN_ON_ONCE(asid != 0);
-               return __sme_pa(pgd);
        }
+
+       return cr3;
 }
 
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
+                                             unsigned long lam)
 {
-       VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
        /*
         * Use boot_cpu_has() instead of this_cpu_has() as this function
         * might be called during early boot. This should work even after
         * boot because all CPU's the have same capabilities:
         */
        VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
-       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+       return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
 }
 
 /*
@@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
                  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
 }
 
-static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
+                           bool need_flush)
 {
        unsigned long new_mm_cr3;
 
        if (need_flush) {
                invalidate_user_asid(new_asid);
-               new_mm_cr3 = build_cr3(pgdir, new_asid);
+               new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
        } else {
-               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
        }
 
        /*
@@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 {
        struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
        u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       unsigned long new_lam = mm_lam_cr3_mask(next);
        bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
        unsigned cpu = smp_processor_id();
        u64 next_tlb_gen;
@@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * isn't free.
         */
 #ifdef CONFIG_DEBUG_VM
-       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
+                                                  tlbstate_lam_cr3_mask()))) {
                /*
                 * If we were to BUG here, we'd be very likely to kill
                 * the system so hard that we don't see the call trace.
@@ -552,10 +559,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * instruction.
         */
        if (real_prev == next) {
+               /* Not actually switching mm's */
                VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
                           next->context.ctx_id);
 
                /*
+                * If this races with another thread that enables lam, 'new_lam'
+                * might not match tlbstate_lam_cr3_mask().
+                */
+
+               /*
                 * Even in lazy TLB mode, the CPU should stay set in the
                 * mm_cpumask. The TLB shootdown code can figure out from
                 * cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
@@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
                barrier();
        }
 
+       set_tlbstate_lam_mode(next);
        if (need_flush) {
                this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
                this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-               load_new_mm_cr3(next->pgd, new_asid, true);
+               load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
 
                trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
        } else {
                /* The new ASID is already up to date. */
-               load_new_mm_cr3(next->pgd, new_asid, false);
+               load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
 
                trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
        }
@@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
        /* Assert that CR3 already references the right mm. */
        WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
 
+       /* LAM expected to be disabled */
+       WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
+       WARN_ON(mm_lam_cr3_mask(mm));
+
        /*
         * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
         * doesn't work like other CR4 bits because it can only be set from
@@ -699,8 +717,8 @@ void initialize_tlbstate_and_flush(void)
        WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
                !(cr4_read_shadow() & X86_CR4_PCIDE));
 
-       /* Force ASID 0 and force a TLB flush. */
-       write_cr3(build_cr3(mm->pgd, 0));
+       /* Disable LAM, force ASID 0 and force a TLB flush. */
+       write_cr3(build_cr3(mm->pgd, 0, 0));
 
        /* Reinitialize tlbstate. */
        this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
@@ -708,6 +726,7 @@ void initialize_tlbstate_and_flush(void)
        this_cpu_write(cpu_tlbstate.next_asid, 1);
        this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
        this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
+       set_tlbstate_lam_mode(mm);
 
        for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
                this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
@@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  */
 unsigned long __get_current_cr3_fast(void)
 {
-       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
-               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+       unsigned long cr3 =
+               build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+                         this_cpu_read(cpu_tlbstate.loaded_mm_asid),
+                         tlbstate_lam_cr3_mask());
 
        /* For now, be very restrictive about when this can be called. */
        VM_WARN_ON(in_nmi() || preemptible());