mm/hugetlb: fix race condition of uffd missing/minor handling

author Peter Xu <peterx@redhat.com>

Tue, 4 Oct 2022 19:33:58 +0000 (15:33 -0400)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 13 Oct 2022 01:51:50 +0000 (18:51 -0700)
author Peter Xu <peterx@redhat.com>
Tue, 4 Oct 2022 19:33:58 +0000 (15:33 -0400)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 13 Oct 2022 01:51:50 +0000 (18:51 -0700)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 9a910612336dad400ac604432b0979d5ce40ff52..bf9d8d04bf4f82760173765fe6d2da856de154a5 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5535,6 +5535,23 @@ static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
         return handle_userfault(&vmf, reason);
  }
  
+/*
+ * Recheck pte with pgtable lock.  Returns true if pte didn't change, or
+ * false if pte changed or is changing.
+ */
+static bool hugetlb_pte_stable(struct hstate *h, struct mm_struct *mm,
+                              pte_t *ptep, pte_t old_pte)
+{
+       spinlock_t *ptl;
+       bool same;
+
+       ptl = huge_pte_lock(h, mm, ptep);
+       same = pte_same(huge_ptep_get(ptep), old_pte);
+       spin_unlock(ptl);
+
+       return same;
+}
+
  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                         struct vm_area_struct *vma,
                         struct address_space *mapping, pgoff_t idx,
@@ -5575,10 +5592,33 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                 if (idx >= size)
                         goto out;
                 /* Check for page in userfault range */
-               if (userfaultfd_missing(vma))
-                       return hugetlb_handle_userfault(vma, mapping, idx,
-                                                      flags, haddr, address,
-                                                      VM_UFFD_MISSING);
+               if (userfaultfd_missing(vma)) {
+                       /*
+                        * Since hugetlb_no_page() was examining pte
+                        * without pgtable lock, we need to re-test under
+                        * lock because the pte may not be stable and could
+                        * have changed from under us.  Try to detect
+                        * either changed or during-changing ptes and retry
+                        * properly when needed.
+                        *
+                        * Note that userfaultfd is actually fine with
+                        * false positives (e.g. caused by pte changed),
+                        * but not wrong logical events (e.g. caused by
+                        * reading a pte during changing).  The latter can
+                        * confuse the userspace, so the strictness is very
+                        * much preferred.  E.g., MISSING event should
+                        * never happen on the page after UFFDIO_COPY has
+                        * correctly installed the page and returned.
+                        */
+                       if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
+                               ret = 0;
+                               goto out;
+                       }
+
+                       return hugetlb_handle_userfault(vma, mapping, idx, flags,
+                                                       haddr, address,
+                                                       VM_UFFD_MISSING);
+               }
  
                 page = alloc_huge_page(vma, haddr, 0);
                 if (IS_ERR(page)) {
@@ -5644,9 +5684,14 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
                 if (userfaultfd_minor(vma)) {
                         unlock_page(page);
                         put_page(page);
-                       return hugetlb_handle_userfault(vma, mapping, idx,
-                                                      flags, haddr, address,
-                                                      VM_UFFD_MINOR);
+                       /* See comment in userfaultfd_missing() block above */
+                       if (!hugetlb_pte_stable(h, mm, ptep, old_pte)) {
+                               ret = 0;
+                               goto out;
+                       }
+                       return hugetlb_handle_userfault(vma, mapping, idx, flags,
+                                                       haddr, address,
+                                                       VM_UFFD_MINOR);
                 }
         }
author	Peter Xu <peterx@redhat.com>
	Tue, 4 Oct 2022 19:33:58 +0000 (15:33 -0400)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 13 Oct 2022 01:51:50 +0000 (18:51 -0700)