mm/gup: fix FOLL_FORCE COW security issue and remove FOLL_COW

author David Hildenbrand <david@redhat.com>

Tue, 9 Aug 2022 20:56:40 +0000 (22:56 +0200)

committer Andrew Morton <akpm@linux-foundation.org>

Sat, 20 Aug 2022 22:17:44 +0000 (15:17 -0700)
author David Hildenbrand <david@redhat.com>
Tue, 9 Aug 2022 20:56:40 +0000 (22:56 +0200)
committer Andrew Morton <akpm@linux-foundation.org>
Sat, 20 Aug 2022 22:17:44 +0000 (15:17 -0700)
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 3bedc449c14d8f80530dd7ce6ece6c7c57d6eaa7..982f2607180b86bad07daaeb42b7a840b0c45e09 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2885,7 +2885,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
  #define FOLL_MIGRATION 0x400   /* wait for page to replace migration entry */
  #define FOLL_TRIED     0x800   /* a retry, previous pass started an IO */
  #define FOLL_REMOTE    0x2000  /* we are working on non-current tsk/mm */
-#define FOLL_COW       0x4000  /* internal GUP flag */
  #define FOLL_ANON      0x8000  /* don't do file mappings */
  #define FOLL_LONGTERM  0x10000 /* mapping lifetime is indefinite: see below */
  #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
diff --git a/mm/gup.c b/mm/gup.c

index 7328251574307b17423d410c554e2879ffff85f2..5abdaf487460567800542482757f577dccad19ac 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -478,14 +478,42 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
         return -EEXIST;
  }
  
-/*
- * FOLL_FORCE can write to even unwritable pte's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PTEs in COW mappings. */
+static inline bool can_follow_write_pte(pte_t pte, struct page *page,
+                                       struct vm_area_struct *vma,
+                                       unsigned int flags)
  {
-       return pte_write(pte) ||
-               ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+       /* If the pte is writable, we can write to the page. */
+       if (pte_write(pte))
+               return true;
+
+       /* Maybe FOLL_FORCE is set to override it? */
+       if (!(flags & FOLL_FORCE))
+               return false;
+
+       /* But FOLL_FORCE has no effect on shared mappings */
+       if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+               return false;
+
+       /* ... or read-only private ones */
+       if (!(vma->vm_flags & VM_MAYWRITE))
+               return false;
+
+       /* ... or already writable ones that just need to take a write fault */
+       if (vma->vm_flags & VM_WRITE)
+               return false;
+
+       /*
+        * See can_change_pte_writable(): we broke COW and could map the page
+        * writable if we have an exclusive anonymous page ...
+        */
+       if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+               return false;
+
+       /* ... and a write-fault isn't required for other reasons. */
+       if (vma_soft_dirty_enabled(vma) && !pte_soft_dirty(pte))
+               return false;
+       return !userfaultfd_pte_wp(vma, pte);
  }
  
  static struct page *follow_page_pte(struct vm_area_struct *vma,
@@ -528,12 +556,19 @@ retry:
         }
         if ((flags & FOLL_NUMA) && pte_protnone(pte))
                 goto no_page;
-       if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
-               pte_unmap_unlock(ptep, ptl);
-               return NULL;
-       }
  
         page = vm_normal_page(vma, address, pte);
+
+       /*
+        * We only care about anon pages in can_follow_write_pte() and don't
+        * have to worry about pte_devmap() because they are never anon.
+        */
+       if ((flags & FOLL_WRITE) &&
+           !can_follow_write_pte(pte, page, vma, flags)) {
+               page = NULL;
+               goto out;
+       }
+
         if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
                 /*
                  * Only return device mapping pages in the FOLL_GET or FOLL_PIN
@@ -986,17 +1021,6 @@ static int faultin_page(struct vm_area_struct *vma,
                 return -EBUSY;
         }
  
-       /*
-        * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
-        * necessary, even if maybe_mkwrite decided not to set pte_write. We
-        * can thus safely do subsequent page lookups as if they were reads.
-        * But only do so when looping for pte_write is futile: in some cases
-        * userspace may also be wanting to write to the gotten user page,
-        * which a read fault here might prevent (a readonly page might get
-        * reCOWed by userspace write).
-        */
-       if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
-               *flags |= FOLL_COW;
         return 0;
  }
  
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index 8a7c1b344abefb4b1b3903ecc61d143d98d6a9bc..e9414ee57c5b149ac4d6434ba2ba3385630a03e9 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1040,12 +1040,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
  
         assert_spin_locked(pmd_lockptr(mm, pmd));
  
-       /*
-        * When we COW a devmap PMD entry, we split it into PTEs, so we should
-        * not be in this function with `flags & FOLL_COW` set.
-        */
-       WARN_ONCE(flags & FOLL_COW, "mm: In follow_devmap_pmd with FOLL_COW set");
-
         /* FOLL_GET and FOLL_PIN are mutually exclusive. */
         if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
                          (FOLL_PIN | FOLL_GET)))
@@ -1395,14 +1389,42 @@ fallback:
         return VM_FAULT_FALLBACK;
  }
  
-/*
- * FOLL_FORCE can write to even unwritable pmd's, but only
- * after we've gone through a COW cycle and they are dirty.
- */
-static inline bool can_follow_write_pmd(pmd_t pmd, unsigned int flags)
+/* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
+static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
+                                       struct vm_area_struct *vma,
+                                       unsigned int flags)
  {
-       return pmd_write(pmd) ||
-              ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
+       /* If the pmd is writable, we can write to the page. */
+       if (pmd_write(pmd))
+               return true;
+
+       /* Maybe FOLL_FORCE is set to override it? */
+       if (!(flags & FOLL_FORCE))
+               return false;
+
+       /* But FOLL_FORCE has no effect on shared mappings */
+       if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+               return false;
+
+       /* ... or read-only private ones */
+       if (!(vma->vm_flags & VM_MAYWRITE))
+               return false;
+
+       /* ... or already writable ones that just need to take a write fault */
+       if (vma->vm_flags & VM_WRITE)
+               return false;
+
+       /*
+        * See can_change_pte_writable(): we broke COW and could map the page
+        * writable if we have an exclusive anonymous page ...
+        */
+       if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+               return false;
+
+       /* ... and a write-fault isn't required for other reasons. */
+       if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+               return false;
+       return !userfaultfd_huge_pmd_wp(vma, pmd);
  }
  
  struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
@@ -1411,12 +1433,16 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
                                    unsigned int flags)
  {
         struct mm_struct *mm = vma->vm_mm;
-       struct page *page = NULL;
+       struct page *page;
  
         assert_spin_locked(pmd_lockptr(mm, pmd));
  
-       if (flags & FOLL_WRITE && !can_follow_write_pmd(*pmd, flags))
-               goto out;
+       page = pmd_page(*pmd);
+       VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+
+       if ((flags & FOLL_WRITE) &&
+           !can_follow_write_pmd(*pmd, page, vma, flags))
+               return NULL;
  
         /* Avoid dumping huge zero page */
         if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
@@ -1424,10 +1450,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
  
         /* Full NUMA hinting faults to serialise migration in fault paths */
         if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
-               goto out;
-
-       page = pmd_page(*pmd);
-       VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
+               return NULL;
  
         if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
                 return ERR_PTR(-EMLINK);
@@ -1444,7 +1467,6 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
         page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
         VM_BUG_ON_PAGE(!PageCompound(page) && !is_zone_device_page(page), page);
  
-out:
         return page;
  }
author	David Hildenbrand <david@redhat.com>
	Tue, 9 Aug 2022 20:56:40 +0000 (22:56 +0200)
committer	Andrew Morton <akpm@linux-foundation.org>
	Sat, 20 Aug 2022 22:17:44 +0000 (15:17 -0700)
include/linux/mm.h		patch \| blob \| history
mm/gup.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history