mm/hmm: make full use of walk_page_range()
authorRalph Campbell <rcampbell@nvidia.com>
Mon, 4 Nov 2019 22:21:40 +0000 (14:21 -0800)
committerJason Gunthorpe <jgg@mellanox.com>
Sat, 23 Nov 2019 23:56:45 +0000 (19:56 -0400)
hmm_range_fault() calls find_vma() and walk_page_range() in a loop.  This
is unnecessary duplication since walk_page_range() calls find_vma() in a
loop already.

Simplify hmm_range_fault() by defining a walk_test() callback function to
filter unhandled vmas.

This also fixes a bug where hmm_range_fault() was not checking start >=
vma->vm_start before checking vma->vm_flags so hmm_range_fault() could
return an error based on the wrong vma for the requested range.

It also fixes a bug when the vma has no read access and the caller did not
request a fault, there shouldn't be any error return code.

Link: https://lore.kernel.org/r/20191104222141.5173-2-rcampbell@nvidia.com
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
mm/hmm.c

index aed2f39..d903b15 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -65,18 +65,15 @@ err:
        return -EFAULT;
 }
 
-static int hmm_pfns_bad(unsigned long addr,
-                       unsigned long end,
-                       struct mm_walk *walk)
+static int hmm_pfns_fill(unsigned long addr, unsigned long end,
+               struct hmm_range *range, enum hmm_pfn_value_e value)
 {
-       struct hmm_vma_walk *hmm_vma_walk = walk->private;
-       struct hmm_range *range = hmm_vma_walk->range;
        uint64_t *pfns = range->pfns;
        unsigned long i;
 
        i = (addr - range->start) >> PAGE_SHIFT;
        for (; addr < end; addr += PAGE_SIZE, i++)
-               pfns[i] = range->values[HMM_PFN_ERROR];
+               pfns[i] = range->values[value];
 
        return 0;
 }
@@ -403,7 +400,7 @@ again:
                }
                return 0;
        } else if (!pmd_present(pmd))
-               return hmm_pfns_bad(start, end, walk);
+               return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 
        if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
                /*
@@ -431,7 +428,7 @@ again:
         * recover.
         */
        if (pmd_bad(pmd))
-               return hmm_pfns_bad(start, end, walk);
+               return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
 
        ptep = pte_offset_map(pmdp, addr);
        i = (addr - range->start) >> PAGE_SHIFT;
@@ -589,13 +586,47 @@ unlock:
 #define hmm_vma_walk_hugetlb_entry NULL
 #endif /* CONFIG_HUGETLB_PAGE */
 
-static void hmm_pfns_clear(struct hmm_range *range,
-                          uint64_t *pfns,
-                          unsigned long addr,
-                          unsigned long end)
+static int hmm_vma_walk_test(unsigned long start, unsigned long end,
+                            struct mm_walk *walk)
 {
-       for (; addr < end; addr += PAGE_SIZE, pfns++)
-               *pfns = range->values[HMM_PFN_NONE];
+       struct hmm_vma_walk *hmm_vma_walk = walk->private;
+       struct hmm_range *range = hmm_vma_walk->range;
+       struct vm_area_struct *vma = walk->vma;
+
+       /*
+        * Skip vma ranges that don't have struct page backing them or
+        * map I/O devices directly.
+        */
+       if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
+               return -EFAULT;
+
+       /*
+        * If the vma does not allow read access, then assume that it does not
+        * allow write access either. HMM does not support architectures
+        * that allow write without read.
+        */
+       if (!(vma->vm_flags & VM_READ)) {
+               bool fault, write_fault;
+
+               /*
+                * Check to see if a fault is requested for any page in the
+                * range.
+                */
+               hmm_range_need_fault(hmm_vma_walk, range->pfns +
+                                       ((start - range->start) >> PAGE_SHIFT),
+                                       (end - start) >> PAGE_SHIFT,
+                                       0, &fault, &write_fault);
+               if (fault || write_fault)
+                       return -EFAULT;
+
+               hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
+               hmm_vma_walk->last = end;
+
+               /* Skip this vma and continue processing the next vma. */
+               return 1;
+       }
+
+       return 0;
 }
 
 static const struct mm_walk_ops hmm_walk_ops = {
@@ -603,6 +634,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
        .pmd_entry      = hmm_vma_walk_pmd,
        .pte_hole       = hmm_vma_walk_hole,
        .hugetlb_entry  = hmm_vma_walk_hugetlb_entry,
+       .test_walk      = hmm_vma_walk_test,
 };
 
 /**
@@ -635,11 +667,12 @@ static const struct mm_walk_ops hmm_walk_ops = {
  */
 long hmm_range_fault(struct hmm_range *range, unsigned int flags)
 {
-       const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
-       unsigned long start = range->start, end;
-       struct hmm_vma_walk hmm_vma_walk;
+       struct hmm_vma_walk hmm_vma_walk = {
+               .range = range,
+               .last = range->start,
+               .flags = flags,
+       };
        struct mm_struct *mm = range->notifier->mm;
-       struct vm_area_struct *vma;
        int ret;
 
        lockdep_assert_held(&mm->mmap_sem);
@@ -649,53 +682,12 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
                if (mmu_interval_check_retry(range->notifier,
                                             range->notifier_seq))
                        return -EBUSY;
+               ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
+                                     &hmm_walk_ops, &hmm_vma_walk);
+       } while (ret == -EBUSY);
 
-               vma = find_vma(mm, start);
-               if (vma == NULL || (vma->vm_flags & device_vma))
-                       return -EFAULT;
-
-               if (!(vma->vm_flags & VM_READ)) {
-                       /*
-                        * If vma do not allow read access, then assume that it
-                        * does not allow write access, either. HMM does not
-                        * support architecture that allow write without read.
-                        */
-                       hmm_pfns_clear(range, range->pfns,
-                               range->start, range->end);
-                       return -EPERM;
-               }
-
-               hmm_vma_walk.pgmap = NULL;
-               hmm_vma_walk.last = start;
-               hmm_vma_walk.flags = flags;
-               hmm_vma_walk.range = range;
-               end = min(range->end, vma->vm_end);
-
-               walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
-                               &hmm_vma_walk);
-
-               do {
-                       ret = walk_page_range(vma->vm_mm, start, end,
-                                       &hmm_walk_ops, &hmm_vma_walk);
-                       start = hmm_vma_walk.last;
-
-                       /* Keep trying while the range is valid. */
-               } while (ret == -EBUSY &&
-                        !mmu_interval_check_retry(range->notifier,
-                                                  range->notifier_seq));
-
-               if (ret) {
-                       unsigned long i;
-
-                       i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
-                       hmm_pfns_clear(range, &range->pfns[i],
-                               hmm_vma_walk.last, range->end);
-                       return ret;
-               }
-               start = end;
-
-       } while (start < range->end);
-
+       if (ret)
+               return ret;
        return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
 }
 EXPORT_SYMBOL(hmm_range_fault);