nouveau/hmm: fault one page at a time

author Ralph Campbell <rcampbell@nvidia.com>

Wed, 1 Jul 2020 22:53:48 +0000 (15:53 -0700)

committer Jason Gunthorpe <jgg@nvidia.com>

Fri, 10 Jul 2020 19:24:28 +0000 (16:24 -0300)
author Ralph Campbell <rcampbell@nvidia.com>
Wed, 1 Jul 2020 22:53:48 +0000 (15:53 -0700)
committer Jason Gunthorpe <jgg@nvidia.com>
Fri, 10 Jul 2020 19:24:28 +0000 (16:24 -0300)
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c

index ba9f935..665dede 100644 (file)
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -516,7 +516,7 @@ static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
  static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
                                     struct hmm_range *range, u64 *ioctl_addr)
  {
-       unsigned long i, npages;
+       struct page *page;
  
         /*
          * The ioctl_addr prepared here is passed through nvif_object_ioctl()
@@ -525,42 +525,38 @@ static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
          * This is all just encoding the internal hmm representation into a
          * different nouveau internal representation.
          */
-       npages = (range->end - range->start) >> PAGE_SHIFT;
-       for (i = 0; i < npages; ++i) {
-               struct page *page;
-
-               if (!(range->hmm_pfns[i] & HMM_PFN_VALID)) {
-                       ioctl_addr[i] = 0;
-                       continue;
-               }
-
-               page = hmm_pfn_to_page(range->hmm_pfns[i]);
-               if (is_device_private_page(page))
-                       ioctl_addr[i] = nouveau_dmem_page_addr(page) |
-                                       NVIF_VMM_PFNMAP_V0_V |
-                                       NVIF_VMM_PFNMAP_V0_VRAM;
-               else
-                       ioctl_addr[i] = page_to_phys(page) |
-                                       NVIF_VMM_PFNMAP_V0_V |
-                                       NVIF_VMM_PFNMAP_V0_HOST;
-               if (range->hmm_pfns[i] & HMM_PFN_WRITE)
-                       ioctl_addr[i] |= NVIF_VMM_PFNMAP_V0_W;
+       if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
+               ioctl_addr[0] = 0;
+               return;
         }
+
+       page = hmm_pfn_to_page(range->hmm_pfns[0]);
+       if (is_device_private_page(page))
+               ioctl_addr[0] = nouveau_dmem_page_addr(page) |
+                               NVIF_VMM_PFNMAP_V0_V |
+                               NVIF_VMM_PFNMAP_V0_VRAM;
+       else
+               ioctl_addr[0] = page_to_phys(page) |
+                               NVIF_VMM_PFNMAP_V0_V |
+                               NVIF_VMM_PFNMAP_V0_HOST;
+       if (range->hmm_pfns[0] & HMM_PFN_WRITE)
+               ioctl_addr[0] |= NVIF_VMM_PFNMAP_V0_W;
  }
  
  static int nouveau_range_fault(struct nouveau_svmm *svmm,
                                struct nouveau_drm *drm, void *data, u32 size,
-                              unsigned long hmm_pfns[], u64 *ioctl_addr,
+                              u64 *ioctl_addr, unsigned long hmm_flags,
                                struct svm_notifier *notifier)
  {
         unsigned long timeout =
                 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
         /* Have HMM fault pages within the fault window to the GPU. */
+       unsigned long hmm_pfns[1];
         struct hmm_range range = {
                 .notifier = &notifier->notifier,
                 .start = notifier->notifier.interval_tree.start,
                 .end = notifier->notifier.interval_tree.last + 1,
-               .pfn_flags_mask = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE,
+               .default_flags = hmm_flags,
                 .hmm_pfns = hmm_pfns,
         };
         struct mm_struct *mm = notifier->notifier.mm;
@@ -575,11 +571,6 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm,
                 ret = hmm_range_fault(&range);
                 mmap_read_unlock(mm);
                 if (ret) {
-                       /*
-                        * FIXME: the input PFN_REQ flags are destroyed on
-                        * -EBUSY, we need to regenerate them, also for the
-                        * other continue below
-                        */
                         if (ret == -EBUSY)
                                 continue;
                         return ret;
@@ -614,17 +605,12 @@ nouveau_svm_fault(struct nvif_notify *notify)
         struct nvif_object *device = &svm->drm->client.device.object;
         struct nouveau_svmm *svmm;
         struct {
-               struct {
-                       struct nvif_ioctl_v0 i;
-                       struct nvif_ioctl_mthd_v0 m;
-                       struct nvif_vmm_pfnmap_v0 p;
-               } i;
-               u64 phys[16];
+               struct nouveau_pfnmap_args i;
+               u64 phys[1];
         } args;
-       unsigned long hmm_pfns[ARRAY_SIZE(args.phys)];
-       struct vm_area_struct *vma;
+       unsigned long hmm_flags;
         u64 inst, start, limit;
-       int fi, fn, pi, fill;
+       int fi, fn;
         int replay = 0, ret;
  
         /* Parse available fault buffer entries into a cache, and update
@@ -691,66 +677,53 @@ nouveau_svm_fault(struct nvif_notify *notify)
                  * window into a single update.
                  */
                 start = buffer->fault[fi]->addr;
-               limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT);
+               limit = start + PAGE_SIZE;
                 if (start < svmm->unmanaged.limit)
                         limit = min_t(u64, limit, svmm->unmanaged.start);
-               SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
  
-               mm = svmm->notifier.mm;
-               if (!mmget_not_zero(mm)) {
-                       nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
-                       continue;
-               }
-
-               /* Intersect fault window with the CPU VMA, cancelling
-                * the fault if the address is invalid.
+               /*
+                * Prepare the GPU-side update of all pages within the
+                * fault window, determining required pages and access
+                * permissions based on pending faults.
                  */
-               mmap_read_lock(mm);
-               vma = find_vma_intersection(mm, start, limit);
-               if (!vma) {
-                       SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
-                       mmap_read_unlock(mm);
-                       mmput(mm);
-                       nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
-                       continue;
+               args.i.p.addr = start;
+               args.i.p.page = PAGE_SHIFT;
+               args.i.p.size = PAGE_SIZE;
+               /*
+                * Determine required permissions based on GPU fault
+                * access flags.
+                * XXX: atomic?
+                */
+               switch (buffer->fault[fi]->access) {
+               case 0: /* READ. */
+                       hmm_flags = HMM_PFN_REQ_FAULT;
+                       break;
+               case 3: /* PREFETCH. */
+                       hmm_flags = 0;
+                       break;
+               default:
+                       hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
+                       break;
                 }
-               start = max_t(u64, start, vma->vm_start);
-               limit = min_t(u64, limit, vma->vm_end);
-               mmap_read_unlock(mm);
-               SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
  
-               if (buffer->fault[fi]->addr != start) {
-                       SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
-                       mmput(mm);
+               mm = svmm->notifier.mm;
+               if (!mmget_not_zero(mm)) {
                         nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
                         continue;
                 }
  
-               /* Prepare the GPU-side update of all pages within the
-                * fault window, determining required pages and access
-                * permissions based on pending faults.
-                */
-               args.i.p.page = PAGE_SHIFT;
-               args.i.p.addr = start;
-               for (fn = fi, pi = 0;;) {
-                       /* Determine required permissions based on GPU fault
-                        * access flags.
-                        *XXX: atomic?
-                        */
-                       switch (buffer->fault[fn]->access) {
-                       case 0: /* READ. */
-                               hmm_pfns[pi++] = HMM_PFN_REQ_FAULT;
-                               break;
-                       case 3: /* PREFETCH. */
-                               hmm_pfns[pi++] = 0;
-                               break;
-                       default:
-                               hmm_pfns[pi++] = HMM_PFN_REQ_FAULT |
-                                                HMM_PFN_REQ_WRITE;
-                               break;
-                       }
-                       args.i.p.size = pi << PAGE_SHIFT;
+               notifier.svmm = svmm;
+               ret = mmu_interval_notifier_insert(&notifier.notifier, mm,
+                                                  args.i.p.addr, args.i.p.size,
+                                                  &nouveau_svm_mni_ops);
+               if (!ret) {
+                       ret = nouveau_range_fault(svmm, svm->drm, &args,
+                               sizeof(args), args.phys, hmm_flags, &notifier);
+                       mmu_interval_notifier_remove(&notifier.notifier);
+               }
+               mmput(mm);
  
+               for (fn = fi; ++fn < buffer->fault_nr; ) {
                         /* It's okay to skip over duplicate addresses from the
                          * same SVMM as faults are ordered by access type such
                          * that only the first one needs to be handled.
@@ -758,61 +731,21 @@ nouveau_svm_fault(struct nvif_notify *notify)
                          * ie. WRITE faults appear first, thus any handling of
                          * pending READ faults will already be satisfied.
                          */
-                       while (++fn < buffer->fault_nr &&
-                              buffer->fault[fn]->svmm == svmm &&
-                              buffer->fault[fn    ]->addr ==
-                              buffer->fault[fn - 1]->addr);
-
-                       /* If the next fault is outside the window, or all GPU
-                        * faults have been dealt with, we're done here.
-                        */
-                       if (fn >= buffer->fault_nr ||
-                           buffer->fault[fn]->svmm != svmm ||
+                       if (buffer->fault[fn]->svmm != svmm ||
                             buffer->fault[fn]->addr >= limit)
                                 break;
-
-                       /* Fill in the gap between this fault and the next. */
-                       fill = (buffer->fault[fn    ]->addr -
-                               buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
-                       while (--fill)
-                               hmm_pfns[pi++] = 0;
                 }
  
-               SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
-                        args.i.p.addr,
-                        args.i.p.addr + args.i.p.size, fn - fi);
-
-               notifier.svmm = svmm;
-               ret = mmu_interval_notifier_insert(&notifier.notifier,
-                                                  svmm->notifier.mm,
-                                                  args.i.p.addr, args.i.p.size,
-                                                  &nouveau_svm_mni_ops);
-               if (!ret) {
-                       ret = nouveau_range_fault(
-                               svmm, svm->drm, &args,
-                               sizeof(args.i) + pi * sizeof(args.phys[0]),
-                               hmm_pfns, args.phys, &notifier);
-                       mmu_interval_notifier_remove(&notifier.notifier);
-               }
-               mmput(mm);
+               /* If handling failed completely, cancel all faults. */
+               if (ret) {
+                       while (fi < fn) {
+                               struct nouveau_svm_fault *fault =
+                                       buffer->fault[fi++];
  
-               /* Cancel any faults in the window whose pages didn't manage
-                * to keep their valid bit, or stay writeable when required.
-                *
-                * If handling failed completely, cancel all faults.
-                */
-               while (fi < fn) {
-                       struct nouveau_svm_fault *fault = buffer->fault[fi++];
-                       pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT;
-                       if (ret ||
-                            !(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) ||
-                           (!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) &&
-                            fault->access != 0 && fault->access != 3)) {
                                 nouveau_svm_fault_cancel_fault(svm, fault);
-                               continue;
                         }
+               } else
                         replay++;
-               }
         }
  
         /* Issue fault replay to the GPU. */
author	Ralph Campbell <rcampbell@nvidia.com>
	Wed, 1 Jul 2020 22:53:48 +0000 (15:53 -0700)
committer	Jason Gunthorpe <jgg@nvidia.com>
	Fri, 10 Jul 2020 19:24:28 +0000 (16:24 -0300)