x86/speculation/l1tf: Disallow non privileged high MMIO PROT_NONE mappings

author Andi Kleen <ak@linux.intel.com>

Wed, 13 Jun 2018 22:48:27 +0000 (15:48 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Wed, 15 Aug 2018 16:14:45 +0000 (18:14 +0200)
author Andi Kleen <ak@linux.intel.com>
Wed, 13 Jun 2018 22:48:27 +0000 (15:48 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Aug 2018 16:14:45 +0000 (18:14 +0200)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h

index 441e19d..38b679d 100644 (file)
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1026,6 +1026,15 @@ static inline u16 pte_flags_pkey(unsigned long pte_flags)
  #endif
  }
  
+
+#define __HAVE_ARCH_PFN_MODIFY_ALLOWED 1
+extern bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot);
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+       return boot_cpu_has_bug(X86_BUG_L1TF);
+}
+
  #include <asm-generic/pgtable.h>
  #endif /* __ASSEMBLY__ */
  
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c

index d2dc043..5aad869 100644 (file)
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -121,3 +121,24 @@ const char *arch_vma_name(struct vm_area_struct *vma)
                 return "[mpx]";
         return NULL;
  }
+
+/*
+ * Only allow root to set high MMIO mappings to PROT_NONE.
+ * This prevents an unpriv. user to set them to PROT_NONE and invert
+ * them, then pointing to valid memory for L1TF speculation.
+ *
+ * Note: for locked down kernels may want to disable the root override.
+ */
+bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+       if (!boot_cpu_has_bug(X86_BUG_L1TF))
+               return true;
+       if (!__pte_needs_invert(pgprot_val(prot)))
+               return true;
+       /* If it's real memory always allow */
+       if (pfn_valid(pfn))
+               return true;
+       if (pfn > l1tf_pfn_limit() && !capable(CAP_SYS_ADMIN))
+               return false;
+       return true;
+}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h

index 4e8551c..0ffe405 100644 (file)
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -842,4 +842,16 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
  #endif
  #endif
  
+#ifndef __HAVE_ARCH_PFN_MODIFY_ALLOWED
+static inline bool pfn_modify_allowed(unsigned long pfn, pgprot_t prot)
+{
+       return true;
+}
+
+static inline bool arch_has_pfn_modify_check(void)
+{
+       return false;
+}
+#endif
+
  #endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/mm/memory.c b/mm/memory.c

index d2db2c4..88f8d6a 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1641,6 +1641,9 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
         if (track_pfn_insert(vma, &pgprot, __pfn_to_pfn_t(pfn, PFN_DEV)))
                 return -EINVAL;
  
+       if (!pfn_modify_allowed(pfn, pgprot))
+               return -EACCES;
+
         ret = insert_pfn(vma, addr, __pfn_to_pfn_t(pfn, PFN_DEV), pgprot);
  
         return ret;
@@ -1659,6 +1662,9 @@ int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
         if (track_pfn_insert(vma, &pgprot, pfn))
                 return -EINVAL;
  
+       if (!pfn_modify_allowed(pfn_t_to_pfn(pfn), pgprot))
+               return -EACCES;
+
         /*
          * If we don't have pte special, then we have to use the pfn_valid()
          * based VM_MIXEDMAP scheme (see vm_normal_page), and thus we *must*
@@ -1692,6 +1698,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
  {
         pte_t *pte;
         spinlock_t *ptl;
+       int err = 0;
  
         pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
         if (!pte)
@@ -1699,12 +1706,16 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
         arch_enter_lazy_mmu_mode();
         do {
                 BUG_ON(!pte_none(*pte));
+               if (!pfn_modify_allowed(pfn, prot)) {
+                       err = -EACCES;
+                       break;
+               }
                 set_pte_at(mm, addr, pte, pte_mkspecial(pfn_pte(pfn, prot)));
                 pfn++;
         } while (pte++, addr += PAGE_SIZE, addr != end);
         arch_leave_lazy_mmu_mode();
         pte_unmap_unlock(pte - 1, ptl);
-       return 0;
+       return err;
  }
  
  static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
@@ -1713,6 +1724,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
  {
         pmd_t *pmd;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         pmd = pmd_alloc(mm, pud, addr);
@@ -1721,9 +1733,10 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
         VM_BUG_ON(pmd_trans_huge(*pmd));
         do {
                 next = pmd_addr_end(addr, end);
-               if (remap_pte_range(mm, pmd, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pte_range(mm, pmd, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (pmd++, addr = next, addr != end);
         return 0;
  }
@@ -1734,6 +1747,7 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
  {
         pud_t *pud;
         unsigned long next;
+       int err;
  
         pfn -= addr >> PAGE_SHIFT;
         pud = pud_alloc(mm, pgd, addr);
@@ -1741,9 +1755,10 @@ static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
                 return -ENOMEM;
         do {
                 next = pud_addr_end(addr, end);
-               if (remap_pmd_range(mm, pud, addr, next,
-                               pfn + (addr >> PAGE_SHIFT), prot))
-                       return -ENOMEM;
+               err = remap_pmd_range(mm, pud, addr, next,
+                               pfn + (addr >> PAGE_SHIFT), prot);
+               if (err)
+                       return err;
         } while (pud++, addr = next, addr != end);
         return 0;
  }
diff --git a/mm/mprotect.c b/mm/mprotect.c

index ae740c9..6896f77 100644 (file)
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -260,6 +260,42 @@ unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
         return pages;
  }
  
+static int prot_none_pte_entry(pte_t *pte, unsigned long addr,
+                              unsigned long next, struct mm_walk *walk)
+{
+       return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+               0 : -EACCES;
+}
+
+static int prot_none_hugetlb_entry(pte_t *pte, unsigned long hmask,
+                                  unsigned long addr, unsigned long next,
+                                  struct mm_walk *walk)
+{
+       return pfn_modify_allowed(pte_pfn(*pte), *(pgprot_t *)(walk->private)) ?
+               0 : -EACCES;
+}
+
+static int prot_none_test(unsigned long addr, unsigned long next,
+                         struct mm_walk *walk)
+{
+       return 0;
+}
+
+static int prot_none_walk(struct vm_area_struct *vma, unsigned long start,
+                          unsigned long end, unsigned long newflags)
+{
+       pgprot_t new_pgprot = vm_get_page_prot(newflags);
+       struct mm_walk prot_none_walk = {
+               .pte_entry = prot_none_pte_entry,
+               .hugetlb_entry = prot_none_hugetlb_entry,
+               .test_walk = prot_none_test,
+               .mm = current->mm,
+               .private = &new_pgprot,
+       };
+
+       return walk_page_range(start, end, &prot_none_walk);
+}
+
  int
  mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
         unsigned long start, unsigned long end, unsigned long newflags)
@@ -278,6 +314,19 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
         }
  
         /*
+        * Do PROT_NONE PFN permission checks here when we can still
+        * bail out without undoing a lot of state. This is a rather
+        * uncommon case, so doesn't need to be very optimized.
+        */
+       if (arch_has_pfn_modify_check() &&
+           (vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+           (newflags & (VM_READ|VM_WRITE|VM_EXEC)) == 0) {
+               error = prot_none_walk(vma, start, end, newflags);
+               if (error)
+                       return error;
+       }
+
+       /*
          * If we make a private mapping writable we increase our commit;
          * but (without finer accounting) cannot reduce our commit if we
          * make it unwritable again. hugetlb mapping were accounted for
author	Andi Kleen <ak@linux.intel.com>
	Wed, 13 Jun 2018 22:48:27 +0000 (15:48 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Wed, 15 Aug 2018 16:14:45 +0000 (18:14 +0200)
arch/x86/include/asm/pgtable.h		patch \| blob \| history
arch/x86/mm/mmap.c		patch \| blob \| history
include/asm-generic/pgtable.h		patch \| blob \| history
mm/memory.c		patch \| blob \| history
mm/mprotect.c		patch \| blob \| history