hugetlb: unshare some PMDs when splitting VMAs
authorJames Houghton <jthoughton@google.com>
Wed, 4 Jan 2023 23:19:10 +0000 (23:19 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 24 Jan 2023 06:22:43 +0000 (07:22 +0100)
[ Upstream commit b30c14cd61025eeea2f2e8569606cd167ba9ad2d ]

PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however,
it is possible that HugeTLB VMAs are split without unsharing the PMDs
first.

Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE
in hugetlb_change_protection [1].  The key there is that
hugetlb_unshare_all_pmds will not attempt to unshare PMDs in
non-PUD_SIZE-aligned sections of the VMA.

It might seem ideal to unshare in hugetlb_vm_op_open, but we need to
unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split
seems natural.

[1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/

Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com
Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp")
Signed-off-by: James Houghton <jthoughton@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
mm/hugetlb.c

index e7bd42f..8599f16 100644 (file)
@@ -82,6 +82,8 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 
 /* Forward declaration */
 static int hugetlb_acct_memory(struct hstate *h, long delta);
+static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+               unsigned long start, unsigned long end);
 
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
@@ -4164,6 +4166,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
 {
        if (addr & ~(huge_page_mask(hstate_vma(vma))))
                return -EINVAL;
+
+       /*
+        * PMD sharing is only possible for PUD_SIZE-aligned address ranges
+        * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
+        * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
+        */
+       if (addr & ~PUD_MASK) {
+               /*
+                * hugetlb_vm_op_split is called right before we attempt to
+                * split the VMA. We will need to unshare PMDs in the old and
+                * new VMAs, so let's unshare before we split.
+                */
+               unsigned long floor = addr & PUD_MASK;
+               unsigned long ceil = floor + PUD_SIZE;
+
+               if (floor >= vma->vm_start && ceil <= vma->vm_end)
+                       hugetlb_unshare_pmds(vma, floor, ceil);
+       }
+
        return 0;
 }
 
@@ -6349,26 +6370,21 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
        }
 }
 
-/*
- * This function will unconditionally remove all the shared pmd pgtable entries
- * within the specific vma for a hugetlbfs memory range.
- */
-void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
+                                  unsigned long start,
+                                  unsigned long end)
 {
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
        struct mm_struct *mm = vma->vm_mm;
        struct mmu_notifier_range range;
-       unsigned long address, start, end;
+       unsigned long address;
        spinlock_t *ptl;
        pte_t *ptep;
 
        if (!(vma->vm_flags & VM_MAYSHARE))
                return;
 
-       start = ALIGN(vma->vm_start, PUD_SIZE);
-       end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
-
        if (start >= end)
                return;
 
@@ -6400,6 +6416,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
        mmu_notifier_invalidate_range_end(&range);
 }
 
+/*
+ * This function will unconditionally remove all the shared pmd pgtable entries
+ * within the specific vma for a hugetlbfs memory range.
+ */
+void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+{
+       hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
+                       ALIGN_DOWN(vma->vm_end, PUD_SIZE));
+}
+
 #ifdef CONFIG_CMA
 static bool cma_reserve_called __initdata;