Merge tag 'x86_mm_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 30 Aug 2023 16:54:00 +0000 (09:54 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 30 Aug 2023 16:54:00 +0000 (09:54 -0700)
Pull x86 mm updates from Dave Hansen:
 "A pair of small x86/mm updates. The INVPCID one is purely a cleanup.
  The PAT one fixes a real issue, albeit a relatively obscure one
  (graphics device passthrough under Xen). The fix also makes the code
  much more readable.

  Summary:

   - Remove unnecessary "INVPCID single" feature tracking

   - Include PAT in page protection modify mask"

* tag 'x86_mm_for_6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Remove "INVPCID single" feature tracking
  x86/mm: Fix PAT bit missing from page protection modify mask

arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/pgtable_types.h
arch/x86/mm/init.c
arch/x86/mm/tlb.c

index b69b0d7..7b4ecbf 100644 (file)
 #define X86_FEATURE_CAT_L3             ( 7*32+ 4) /* Cache Allocation Technology L3 */
 #define X86_FEATURE_CAT_L2             ( 7*32+ 5) /* Cache Allocation Technology L2 */
 #define X86_FEATURE_CDP_L3             ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_XCOMPACTED         ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
index ba3e255..a6deb67 100644 (file)
  * instance, and is *not* included in this mask since
  * pte_modify() does modify it.
  */
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |         \
-                        _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \
-                        _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC |  \
-                        _PAGE_UFFD_WP)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+#define _COMMON_PAGE_CHG_MASK  (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |        \
+                                _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |\
+                                _PAGE_SOFT_DIRTY | _PAGE_DEVMAP | _PAGE_ENC | \
+                                _PAGE_UFFD_WP)
+#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
+#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
 
 /*
  * The cache modes defined here are used to translate between pure SW usage
index ffa25e9..679893e 100644 (file)
@@ -306,15 +306,6 @@ static void setup_pcid(void)
                 * start_secondary().
                 */
                cr4_set_bits(X86_CR4_PCIDE);
-
-               /*
-                * INVPCID's single-context modes (2/3) only work if we set
-                * X86_CR4_PCIDE, *and* we INVPCID support.  It's unusable
-                * on systems that have X86_CR4_PCIDE clear, or that have
-                * no INVPCID support at all.
-                */
-               if (boot_cpu_has(X86_FEATURE_INVPCID))
-                       setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
        } else {
                /*
                 * flush_tlb_all(), as currently implemented, won't work if
index 2d25391..453ea95 100644 (file)
@@ -1142,21 +1142,28 @@ void flush_tlb_one_kernel(unsigned long addr)
  */
 STATIC_NOPV void native_flush_tlb_one_user(unsigned long addr)
 {
-       u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       u32 loaded_mm_asid;
+       bool cpu_pcide;
 
+       /* Flush 'addr' from the kernel PCID: */
        asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
 
+       /* If PTI is off there is no user PCID and nothing to flush. */
        if (!static_cpu_has(X86_FEATURE_PTI))
                return;
 
+       loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+       cpu_pcide      = this_cpu_read(cpu_tlbstate.cr4) & X86_CR4_PCIDE;
+
        /*
-        * Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
-        * Just use invalidate_user_asid() in case we are called early.
+        * invpcid_flush_one(pcid>0) will #GP if CR4.PCIDE==0.  Check
+        * 'cpu_pcide' to ensure that *this* CPU will not trigger those
+        * #GP's even if called before CR4.PCIDE has been initialized.
         */
-       if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
-               invalidate_user_asid(loaded_mm_asid);
-       else
+       if (boot_cpu_has(X86_FEATURE_INVPCID) && cpu_pcide)
                invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
+       else
+               invalidate_user_asid(loaded_mm_asid);
 }
 
 void flush_tlb_one_user(unsigned long addr)