mm: handle some PMD faults under the VMA lock
authorMatthew Wilcox (Oracle) <willy@infradead.org>
Mon, 24 Jul 2023 18:54:05 +0000 (19:54 +0100)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 18 Aug 2023 17:12:51 +0000 (10:12 -0700)
Push the VMA_LOCK check down from __handle_mm_fault() to
handle_pte_fault().  Once again, we refuse to call ->huge_fault() with the
VMA lock held, but we will wait for a PMD migration entry with the VMA
lock held, handle NUMA migration and set the accessed bit.  We were
already doing this for anonymous VMAs, so it should be safe.

Link: https://lkml.kernel.org/r/20230724185410.1124082-6-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Arjun Roy <arjunroy@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Punit Agrawal <punit.agrawal@bytedance.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/memory.c

index 29353d5..932fc62 100644 (file)
@@ -4821,36 +4821,47 @@ out_map:
 
 static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 {
-       if (vma_is_anonymous(vmf->vma))
+       struct vm_area_struct *vma = vmf->vma;
+       if (vma_is_anonymous(vma))
                return do_huge_pmd_anonymous_page(vmf);
-       if (vmf->vma->vm_ops->huge_fault)
-               return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+       if (vma->vm_ops->huge_fault) {
+               if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+                       vma_end_read(vma);
+                       return VM_FAULT_RETRY;
+               }
+               return vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+       }
        return VM_FAULT_FALLBACK;
 }
 
 /* `inline' is required to avoid gcc 4.1.2 build error */
 static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
 {
+       struct vm_area_struct *vma = vmf->vma;
        const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
        vm_fault_t ret;
 
-       if (vma_is_anonymous(vmf->vma)) {
+       if (vma_is_anonymous(vma)) {
                if (likely(!unshare) &&
-                   userfaultfd_huge_pmd_wp(vmf->vma, vmf->orig_pmd))
+                   userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd))
                        return handle_userfault(vmf, VM_UFFD_WP);
                return do_huge_pmd_wp_page(vmf);
        }
 
-       if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
-               if (vmf->vma->vm_ops->huge_fault) {
-                       ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+       if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+               if (vma->vm_ops->huge_fault) {
+                       if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
+                               vma_end_read(vma);
+                               return VM_FAULT_RETRY;
+                       }
+                       ret = vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
                        if (!(ret & VM_FAULT_FALLBACK))
                                return ret;
                }
        }
 
        /* COW or write-notify handled on pte level: split pmd. */
-       __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL);
+       __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL);
 
        return VM_FAULT_FALLBACK;
 }
@@ -4921,6 +4932,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 {
        pte_t entry;
 
+       if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vmf->vma)) {
+               vma_end_read(vmf->vma);
+               return VM_FAULT_RETRY;
+       }
+
        if (unlikely(pmd_none(*vmf->pmd))) {
                /*
                 * Leave __pte_alloc() until later: because vm_ops->fault may
@@ -5060,11 +5076,6 @@ retry_pud:
        if (pud_trans_unstable(vmf.pud))
                goto retry_pud;
 
-       if ((flags & FAULT_FLAG_VMA_LOCK) && !vma_is_anonymous(vma)) {
-               vma_end_read(vma);
-               return VM_FAULT_RETRY;
-       }
-
        if (pmd_none(*vmf.pmd) &&
            hugepage_vma_check(vma, vm_flags, false, true, true)) {
                ret = create_huge_pmd(&vmf);