e963f7d819534df2684ee2206a8c451ed36a0659
[platform/kernel/linux-rpi.git] / drivers / gpu / drm / nouveau / nouveau_svm.c
1 /*
2  * Copyright 2018 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 #include "nouveau_svm.h"
23 #include "nouveau_drv.h"
24 #include "nouveau_chan.h"
25 #include "nouveau_dmem.h"
26
27 #include <nvif/notify.h>
28 #include <nvif/object.h>
29 #include <nvif/vmm.h>
30
31 #include <nvif/class.h>
32 #include <nvif/clb069.h>
33 #include <nvif/ifc00d.h>
34
35 #include <linux/sched/mm.h>
36 #include <linux/sort.h>
37 #include <linux/hmm.h>
38
39 struct nouveau_svm {
40         struct nouveau_drm *drm;
41         struct mutex mutex;
42         struct list_head inst;
43
44         struct nouveau_svm_fault_buffer {
45                 int id;
46                 struct nvif_object object;
47                 u32 entries;
48                 u32 getaddr;
49                 u32 putaddr;
50                 u32 get;
51                 u32 put;
52                 struct nvif_notify notify;
53
54                 struct nouveau_svm_fault {
55                         u64 inst;
56                         u64 addr;
57                         u64 time;
58                         u32 engine;
59                         u8  gpc;
60                         u8  hub;
61                         u8  access;
62                         u8  client;
63                         u8  fault;
64                         struct nouveau_svmm *svmm;
65                 } **fault;
66                 int fault_nr;
67         } buffer[1];
68 };
69
70 #define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
71 #define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
72
73 struct nouveau_pfnmap_args {
74         struct nvif_ioctl_v0 i;
75         struct nvif_ioctl_mthd_v0 m;
76         struct nvif_vmm_pfnmap_v0 p;
77 };
78
79 struct nouveau_ivmm {
80         struct nouveau_svmm *svmm;
81         u64 inst;
82         struct list_head head;
83 };
84
85 static struct nouveau_ivmm *
86 nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
87 {
88         struct nouveau_ivmm *ivmm;
89         list_for_each_entry(ivmm, &svm->inst, head) {
90                 if (ivmm->inst == inst)
91                         return ivmm;
92         }
93         return NULL;
94 }
95
96 #define SVMM_DBG(s,f,a...)                                                     \
97         NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
98 #define SVMM_ERR(s,f,a...)                                                     \
99         NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
100
101 int
102 nouveau_svmm_bind(struct drm_device *dev, void *data,
103                   struct drm_file *file_priv)
104 {
105         struct nouveau_cli *cli = nouveau_cli(file_priv);
106         struct drm_nouveau_svm_bind *args = data;
107         unsigned target, cmd, priority;
108         unsigned long addr, end;
109         struct mm_struct *mm;
110
111         args->va_start &= PAGE_MASK;
112         args->va_end = ALIGN(args->va_end, PAGE_SIZE);
113
114         /* Sanity check arguments */
115         if (args->reserved0 || args->reserved1)
116                 return -EINVAL;
117         if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
118                 return -EINVAL;
119         if (args->va_start >= args->va_end)
120                 return -EINVAL;
121
122         cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
123         cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
124         switch (cmd) {
125         case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
126                 break;
127         default:
128                 return -EINVAL;
129         }
130
131         priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
132         priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
133
134         /* FIXME support CPU target ie all target value < GPU_VRAM */
135         target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
136         target &= NOUVEAU_SVM_BIND_TARGET_MASK;
137         switch (target) {
138         case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
139                 break;
140         default:
141                 return -EINVAL;
142         }
143
144         /*
145          * FIXME: For now refuse non 0 stride, we need to change the migrate
146          * kernel function to handle stride to avoid to create a mess within
147          * each device driver.
148          */
149         if (args->stride)
150                 return -EINVAL;
151
152         /*
153          * Ok we are ask to do something sane, for now we only support migrate
154          * commands but we will add things like memory policy (what to do on
155          * page fault) and maybe some other commands.
156          */
157
158         mm = get_task_mm(current);
159         mmap_read_lock(mm);
160
161         if (!cli->svm.svmm) {
162                 mmap_read_unlock(mm);
163                 return -EINVAL;
164         }
165
166         for (addr = args->va_start, end = args->va_end; addr < end;) {
167                 struct vm_area_struct *vma;
168                 unsigned long next;
169
170                 vma = find_vma_intersection(mm, addr, end);
171                 if (!vma)
172                         break;
173
174                 addr = max(addr, vma->vm_start);
175                 next = min(vma->vm_end, end);
176                 /* This is a best effort so we ignore errors */
177                 nouveau_dmem_migrate_vma(cli->drm, cli->svm.svmm, vma, addr,
178                                          next);
179                 addr = next;
180         }
181
182         /*
183          * FIXME Return the number of page we have migrated, again we need to
184          * update the migrate API to return that information so that we can
185          * report it to user space.
186          */
187         args->result = 0;
188
189         mmap_read_unlock(mm);
190         mmput(mm);
191
192         return 0;
193 }
194
195 /* Unlink channel instance from SVMM. */
196 void
197 nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
198 {
199         struct nouveau_ivmm *ivmm;
200         if (svmm) {
201                 mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
202                 ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst);
203                 if (ivmm) {
204                         list_del(&ivmm->head);
205                         kfree(ivmm);
206                 }
207                 mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
208         }
209 }
210
211 /* Link channel instance to SVMM. */
212 int
213 nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
214 {
215         struct nouveau_ivmm *ivmm;
216         if (svmm) {
217                 if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL)))
218                         return -ENOMEM;
219                 ivmm->svmm = svmm;
220                 ivmm->inst = inst;
221
222                 mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
223                 list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst);
224                 mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
225         }
226         return 0;
227 }
228
229 /* Invalidate SVMM address-range on GPU. */
230 void
231 nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
232 {
233         if (limit > start) {
234                 bool super = svmm->vmm->vmm.object.client->super;
235                 svmm->vmm->vmm.object.client->super = true;
236                 nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
237                                  &(struct nvif_vmm_pfnclr_v0) {
238                                         .addr = start,
239                                         .size = limit - start,
240                                  }, sizeof(struct nvif_vmm_pfnclr_v0));
241                 svmm->vmm->vmm.object.client->super = super;
242         }
243 }
244
245 static int
246 nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
247                                     const struct mmu_notifier_range *update)
248 {
249         struct nouveau_svmm *svmm =
250                 container_of(mn, struct nouveau_svmm, notifier);
251         unsigned long start = update->start;
252         unsigned long limit = update->end;
253
254         if (!mmu_notifier_range_blockable(update))
255                 return -EAGAIN;
256
257         SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
258
259         mutex_lock(&svmm->mutex);
260         if (unlikely(!svmm->vmm))
261                 goto out;
262
263         /*
264          * Ignore invalidation callbacks for device private pages since
265          * the invalidation is handled as part of the migration process.
266          */
267         if (update->event == MMU_NOTIFY_MIGRATE &&
268             update->owner == svmm->vmm->cli->drm->dev)
269                 goto out;
270
271         if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
272                 if (start < svmm->unmanaged.start) {
273                         nouveau_svmm_invalidate(svmm, start,
274                                                 svmm->unmanaged.limit);
275                 }
276                 start = svmm->unmanaged.limit;
277         }
278
279         nouveau_svmm_invalidate(svmm, start, limit);
280
281 out:
282         mutex_unlock(&svmm->mutex);
283         return 0;
284 }
285
286 static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
287 {
288         kfree(container_of(mn, struct nouveau_svmm, notifier));
289 }
290
291 static const struct mmu_notifier_ops nouveau_mn_ops = {
292         .invalidate_range_start = nouveau_svmm_invalidate_range_start,
293         .free_notifier = nouveau_svmm_free_notifier,
294 };
295
296 void
297 nouveau_svmm_fini(struct nouveau_svmm **psvmm)
298 {
299         struct nouveau_svmm *svmm = *psvmm;
300         if (svmm) {
301                 mutex_lock(&svmm->mutex);
302                 svmm->vmm = NULL;
303                 mutex_unlock(&svmm->mutex);
304                 mmu_notifier_put(&svmm->notifier);
305                 *psvmm = NULL;
306         }
307 }
308
309 int
310 nouveau_svmm_init(struct drm_device *dev, void *data,
311                   struct drm_file *file_priv)
312 {
313         struct nouveau_cli *cli = nouveau_cli(file_priv);
314         struct nouveau_svmm *svmm;
315         struct drm_nouveau_svm_init *args = data;
316         int ret;
317
318         /* We need to fail if svm is disabled */
319         if (!cli->drm->svm)
320                 return -ENOSYS;
321
322         /* Allocate tracking for SVM-enabled VMM. */
323         if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL)))
324                 return -ENOMEM;
325         svmm->vmm = &cli->svm;
326         svmm->unmanaged.start = args->unmanaged_addr;
327         svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
328         mutex_init(&svmm->mutex);
329
330         /* Check that SVM isn't already enabled for the client. */
331         mutex_lock(&cli->mutex);
332         if (cli->svm.cli) {
333                 ret = -EBUSY;
334                 goto out_free;
335         }
336
337         /* Allocate a new GPU VMM that can support SVM (managed by the
338          * client, with replayable faults enabled).
339          *
340          * All future channel/memory allocations will make use of this
341          * VMM instead of the standard one.
342          */
343         ret = nvif_vmm_ctor(&cli->mmu, "svmVmm",
344                             cli->vmm.vmm.object.oclass, true,
345                             args->unmanaged_addr, args->unmanaged_size,
346                             &(struct gp100_vmm_v0) {
347                                 .fault_replay = true,
348                             }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
349         if (ret)
350                 goto out_free;
351
352         mmap_write_lock(current->mm);
353         svmm->notifier.ops = &nouveau_mn_ops;
354         ret = __mmu_notifier_register(&svmm->notifier, current->mm);
355         if (ret)
356                 goto out_mm_unlock;
357         /* Note, ownership of svmm transfers to mmu_notifier */
358
359         cli->svm.svmm = svmm;
360         cli->svm.cli = cli;
361         mmap_write_unlock(current->mm);
362         mutex_unlock(&cli->mutex);
363         return 0;
364
365 out_mm_unlock:
366         mmap_write_unlock(current->mm);
367 out_free:
368         mutex_unlock(&cli->mutex);
369         kfree(svmm);
370         return ret;
371 }
372
373 /* Issue fault replay for GPU to retry accesses that faulted previously. */
374 static void
375 nouveau_svm_fault_replay(struct nouveau_svm *svm)
376 {
377         SVM_DBG(svm, "replay");
378         WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
379                                  GP100_VMM_VN_FAULT_REPLAY,
380                                  &(struct gp100_vmm_fault_replay_vn) {},
381                                  sizeof(struct gp100_vmm_fault_replay_vn)));
382 }
383
384 /* Cancel a replayable fault that could not be handled.
385  *
386  * Cancelling the fault will trigger recovery to reset the engine
387  * and kill the offending channel (ie. GPU SIGSEGV).
388  */
389 static void
390 nouveau_svm_fault_cancel(struct nouveau_svm *svm,
391                          u64 inst, u8 hub, u8 gpc, u8 client)
392 {
393         SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
394         WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
395                                  GP100_VMM_VN_FAULT_CANCEL,
396                                  &(struct gp100_vmm_fault_cancel_v0) {
397                                         .hub = hub,
398                                         .gpc = gpc,
399                                         .client = client,
400                                         .inst = inst,
401                                  }, sizeof(struct gp100_vmm_fault_cancel_v0)));
402 }
403
404 static void
405 nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
406                                struct nouveau_svm_fault *fault)
407 {
408         nouveau_svm_fault_cancel(svm, fault->inst,
409                                       fault->hub,
410                                       fault->gpc,
411                                       fault->client);
412 }
413
414 static int
415 nouveau_svm_fault_cmp(const void *a, const void *b)
416 {
417         const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a;
418         const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b;
419         int ret;
420         if ((ret = (s64)fa->inst - fb->inst))
421                 return ret;
422         if ((ret = (s64)fa->addr - fb->addr))
423                 return ret;
424         /*XXX: atomic? */
425         return (fa->access == 0 || fa->access == 3) -
426                (fb->access == 0 || fb->access == 3);
427 }
428
429 static void
430 nouveau_svm_fault_cache(struct nouveau_svm *svm,
431                         struct nouveau_svm_fault_buffer *buffer, u32 offset)
432 {
433         struct nvif_object *memory = &buffer->object;
434         const u32 instlo = nvif_rd32(memory, offset + 0x00);
435         const u32 insthi = nvif_rd32(memory, offset + 0x04);
436         const u32 addrlo = nvif_rd32(memory, offset + 0x08);
437         const u32 addrhi = nvif_rd32(memory, offset + 0x0c);
438         const u32 timelo = nvif_rd32(memory, offset + 0x10);
439         const u32 timehi = nvif_rd32(memory, offset + 0x14);
440         const u32 engine = nvif_rd32(memory, offset + 0x18);
441         const u32   info = nvif_rd32(memory, offset + 0x1c);
442         const u64   inst = (u64)insthi << 32 | instlo;
443         const u8     gpc = (info & 0x1f000000) >> 24;
444         const u8     hub = (info & 0x00100000) >> 20;
445         const u8  client = (info & 0x00007f00) >> 8;
446         struct nouveau_svm_fault *fault;
447
448         //XXX: i think we're supposed to spin waiting */
449         if (WARN_ON(!(info & 0x80000000)))
450                 return;
451
452         nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000);
453
454         if (!buffer->fault[buffer->fault_nr]) {
455                 fault = kmalloc(sizeof(*fault), GFP_KERNEL);
456                 if (WARN_ON(!fault)) {
457                         nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
458                         return;
459                 }
460                 buffer->fault[buffer->fault_nr] = fault;
461         }
462
463         fault = buffer->fault[buffer->fault_nr++];
464         fault->inst   = inst;
465         fault->addr   = (u64)addrhi << 32 | addrlo;
466         fault->time   = (u64)timehi << 32 | timelo;
467         fault->engine = engine;
468         fault->gpc    = gpc;
469         fault->hub    = hub;
470         fault->access = (info & 0x000f0000) >> 16;
471         fault->client = client;
472         fault->fault  = (info & 0x0000001f);
473
474         SVM_DBG(svm, "fault %016llx %016llx %02x",
475                 fault->inst, fault->addr, fault->access);
476 }
477
478 struct svm_notifier {
479         struct mmu_interval_notifier notifier;
480         struct nouveau_svmm *svmm;
481 };
482
483 static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
484                                          const struct mmu_notifier_range *range,
485                                          unsigned long cur_seq)
486 {
487         struct svm_notifier *sn =
488                 container_of(mni, struct svm_notifier, notifier);
489
490         /*
491          * serializes the update to mni->invalidate_seq done by caller and
492          * prevents invalidation of the PTE from progressing while HW is being
493          * programmed. This is very hacky and only works because the normal
494          * notifier that does invalidation is always called after the range
495          * notifier.
496          */
497         if (mmu_notifier_range_blockable(range))
498                 mutex_lock(&sn->svmm->mutex);
499         else if (!mutex_trylock(&sn->svmm->mutex))
500                 return false;
501         mmu_interval_set_seq(mni, cur_seq);
502         mutex_unlock(&sn->svmm->mutex);
503         return true;
504 }
505
506 static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
507         .invalidate = nouveau_svm_range_invalidate,
508 };
509
510 static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
511                                     struct hmm_range *range,
512                                     struct nouveau_pfnmap_args *args)
513 {
514         struct page *page;
515
516         /*
517          * The address prepared here is passed through nvif_object_ioctl()
518          * to an eventual DMA map in something like gp100_vmm_pgt_pfn()
519          *
520          * This is all just encoding the internal hmm representation into a
521          * different nouveau internal representation.
522          */
523         if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
524                 args->p.phys[0] = 0;
525                 return;
526         }
527
528         page = hmm_pfn_to_page(range->hmm_pfns[0]);
529         /*
530          * Only map compound pages to the GPU if the CPU is also mapping the
531          * page as a compound page. Otherwise, the PTE protections might not be
532          * consistent (e.g., CPU only maps part of a compound page).
533          * Note that the underlying page might still be larger than the
534          * CPU mapping (e.g., a PUD sized compound page partially mapped with
535          * a PMD sized page table entry).
536          */
537         if (hmm_pfn_to_map_order(range->hmm_pfns[0])) {
538                 unsigned long addr = args->p.addr;
539
540                 args->p.page = hmm_pfn_to_map_order(range->hmm_pfns[0]) +
541                                 PAGE_SHIFT;
542                 args->p.size = 1UL << args->p.page;
543                 args->p.addr &= ~(args->p.size - 1);
544                 page -= (addr - args->p.addr) >> PAGE_SHIFT;
545         }
546         if (is_device_private_page(page))
547                 args->p.phys[0] = nouveau_dmem_page_addr(page) |
548                                 NVIF_VMM_PFNMAP_V0_V |
549                                 NVIF_VMM_PFNMAP_V0_VRAM;
550         else
551                 args->p.phys[0] = page_to_phys(page) |
552                                 NVIF_VMM_PFNMAP_V0_V |
553                                 NVIF_VMM_PFNMAP_V0_HOST;
554         if (range->hmm_pfns[0] & HMM_PFN_WRITE)
555                 args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W;
556 }
557
558 static int nouveau_range_fault(struct nouveau_svmm *svmm,
559                                struct nouveau_drm *drm,
560                                struct nouveau_pfnmap_args *args, u32 size,
561                                unsigned long hmm_flags,
562                                struct svm_notifier *notifier)
563 {
564         unsigned long timeout =
565                 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
566         /* Have HMM fault pages within the fault window to the GPU. */
567         unsigned long hmm_pfns[1];
568         struct hmm_range range = {
569                 .notifier = &notifier->notifier,
570                 .start = notifier->notifier.interval_tree.start,
571                 .end = notifier->notifier.interval_tree.last + 1,
572                 .default_flags = hmm_flags,
573                 .hmm_pfns = hmm_pfns,
574                 .dev_private_owner = drm->dev,
575         };
576         struct mm_struct *mm = notifier->notifier.mm;
577         int ret;
578
579         while (true) {
580                 if (time_after(jiffies, timeout))
581                         return -EBUSY;
582
583                 range.notifier_seq = mmu_interval_read_begin(range.notifier);
584                 mmap_read_lock(mm);
585                 ret = hmm_range_fault(&range);
586                 mmap_read_unlock(mm);
587                 if (ret) {
588                         if (ret == -EBUSY)
589                                 continue;
590                         return ret;
591                 }
592
593                 mutex_lock(&svmm->mutex);
594                 if (mmu_interval_read_retry(range.notifier,
595                                             range.notifier_seq)) {
596                         mutex_unlock(&svmm->mutex);
597                         continue;
598                 }
599                 break;
600         }
601
602         nouveau_hmm_convert_pfn(drm, &range, args);
603
604         svmm->vmm->vmm.object.client->super = true;
605         ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
606         svmm->vmm->vmm.object.client->super = false;
607         mutex_unlock(&svmm->mutex);
608
609         return ret;
610 }
611
612 static int
613 nouveau_svm_fault(struct nvif_notify *notify)
614 {
615         struct nouveau_svm_fault_buffer *buffer =
616                 container_of(notify, typeof(*buffer), notify);
617         struct nouveau_svm *svm =
618                 container_of(buffer, typeof(*svm), buffer[buffer->id]);
619         struct nvif_object *device = &svm->drm->client.device.object;
620         struct nouveau_svmm *svmm;
621         struct {
622                 struct nouveau_pfnmap_args i;
623                 u64 phys[1];
624         } args;
625         unsigned long hmm_flags;
626         u64 inst, start, limit;
627         int fi, fn;
628         int replay = 0, ret;
629
630         /* Parse available fault buffer entries into a cache, and update
631          * the GET pointer so HW can reuse the entries.
632          */
633         SVM_DBG(svm, "fault handler");
634         if (buffer->get == buffer->put) {
635                 buffer->put = nvif_rd32(device, buffer->putaddr);
636                 buffer->get = nvif_rd32(device, buffer->getaddr);
637                 if (buffer->get == buffer->put)
638                         return NVIF_NOTIFY_KEEP;
639         }
640         buffer->fault_nr = 0;
641
642         SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
643         while (buffer->get != buffer->put) {
644                 nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20);
645                 if (++buffer->get == buffer->entries)
646                         buffer->get = 0;
647         }
648         nvif_wr32(device, buffer->getaddr, buffer->get);
649         SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
650
651         /* Sort parsed faults by instance pointer to prevent unnecessary
652          * instance to SVMM translations, followed by address and access
653          * type to reduce the amount of work when handling the faults.
654          */
655         sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault),
656              nouveau_svm_fault_cmp, NULL);
657
658         /* Lookup SVMM structure for each unique instance pointer. */
659         mutex_lock(&svm->mutex);
660         for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) {
661                 if (!svmm || buffer->fault[fi]->inst != inst) {
662                         struct nouveau_ivmm *ivmm =
663                                 nouveau_ivmm_find(svm, buffer->fault[fi]->inst);
664                         svmm = ivmm ? ivmm->svmm : NULL;
665                         inst = buffer->fault[fi]->inst;
666                         SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
667                 }
668                 buffer->fault[fi]->svmm = svmm;
669         }
670         mutex_unlock(&svm->mutex);
671
672         /* Process list of faults. */
673         args.i.i.version = 0;
674         args.i.i.type = NVIF_IOCTL_V0_MTHD;
675         args.i.m.version = 0;
676         args.i.m.method = NVIF_VMM_V0_PFNMAP;
677         args.i.p.version = 0;
678
679         for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
680                 struct svm_notifier notifier;
681                 struct mm_struct *mm;
682
683                 /* Cancel any faults from non-SVM channels. */
684                 if (!(svmm = buffer->fault[fi]->svmm)) {
685                         nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
686                         continue;
687                 }
688                 SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
689
690                 /* We try and group handling of faults within a small
691                  * window into a single update.
692                  */
693                 start = buffer->fault[fi]->addr;
694                 limit = start + PAGE_SIZE;
695                 if (start < svmm->unmanaged.limit)
696                         limit = min_t(u64, limit, svmm->unmanaged.start);
697
698                 /*
699                  * Prepare the GPU-side update of all pages within the
700                  * fault window, determining required pages and access
701                  * permissions based on pending faults.
702                  */
703                 args.i.p.addr = start;
704                 args.i.p.page = PAGE_SHIFT;
705                 args.i.p.size = PAGE_SIZE;
706                 /*
707                  * Determine required permissions based on GPU fault
708                  * access flags.
709                  * XXX: atomic?
710                  */
711                 switch (buffer->fault[fi]->access) {
712                 case 0: /* READ. */
713                         hmm_flags = HMM_PFN_REQ_FAULT;
714                         break;
715                 case 3: /* PREFETCH. */
716                         hmm_flags = 0;
717                         break;
718                 default:
719                         hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
720                         break;
721                 }
722
723                 mm = svmm->notifier.mm;
724                 if (!mmget_not_zero(mm)) {
725                         nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
726                         continue;
727                 }
728
729                 notifier.svmm = svmm;
730                 ret = mmu_interval_notifier_insert(&notifier.notifier, mm,
731                                                    args.i.p.addr, args.i.p.size,
732                                                    &nouveau_svm_mni_ops);
733                 if (!ret) {
734                         ret = nouveau_range_fault(svmm, svm->drm, &args.i,
735                                 sizeof(args), hmm_flags, &notifier);
736                         mmu_interval_notifier_remove(&notifier.notifier);
737                 }
738                 mmput(mm);
739
740                 limit = args.i.p.addr + args.i.p.size;
741                 for (fn = fi; ++fn < buffer->fault_nr; ) {
742                         /* It's okay to skip over duplicate addresses from the
743                          * same SVMM as faults are ordered by access type such
744                          * that only the first one needs to be handled.
745                          *
746                          * ie. WRITE faults appear first, thus any handling of
747                          * pending READ faults will already be satisfied.
748                          * But if a large page is mapped, make sure subsequent
749                          * fault addresses have sufficient access permission.
750                          */
751                         if (buffer->fault[fn]->svmm != svmm ||
752                             buffer->fault[fn]->addr >= limit ||
753                             (buffer->fault[fi]->access == 0 /* READ. */ &&
754                              !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) ||
755                             (buffer->fault[fi]->access != 0 /* READ. */ &&
756                              buffer->fault[fi]->access != 3 /* PREFETCH. */ &&
757                              !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)))
758                                 break;
759                 }
760
761                 /* If handling failed completely, cancel all faults. */
762                 if (ret) {
763                         while (fi < fn) {
764                                 struct nouveau_svm_fault *fault =
765                                         buffer->fault[fi++];
766
767                                 nouveau_svm_fault_cancel_fault(svm, fault);
768                         }
769                 } else
770                         replay++;
771         }
772
773         /* Issue fault replay to the GPU. */
774         if (replay)
775                 nouveau_svm_fault_replay(svm);
776         return NVIF_NOTIFY_KEEP;
777 }
778
779 static struct nouveau_pfnmap_args *
780 nouveau_pfns_to_args(void *pfns)
781 {
782         return container_of(pfns, struct nouveau_pfnmap_args, p.phys);
783 }
784
785 u64 *
786 nouveau_pfns_alloc(unsigned long npages)
787 {
788         struct nouveau_pfnmap_args *args;
789
790         args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL);
791         if (!args)
792                 return NULL;
793
794         args->i.type = NVIF_IOCTL_V0_MTHD;
795         args->m.method = NVIF_VMM_V0_PFNMAP;
796         args->p.page = PAGE_SHIFT;
797
798         return args->p.phys;
799 }
800
801 void
802 nouveau_pfns_free(u64 *pfns)
803 {
804         struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
805
806         kfree(args);
807 }
808
809 void
810 nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
811                  unsigned long addr, u64 *pfns, unsigned long npages)
812 {
813         struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
814         int ret;
815
816         args->p.addr = addr;
817         args->p.size = npages << PAGE_SHIFT;
818
819         mutex_lock(&svmm->mutex);
820
821         svmm->vmm->vmm.object.client->super = true;
822         ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, sizeof(*args) +
823                                 npages * sizeof(args->p.phys[0]), NULL);
824         svmm->vmm->vmm.object.client->super = false;
825
826         mutex_unlock(&svmm->mutex);
827 }
828
829 static void
830 nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
831 {
832         struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
833         nvif_notify_put(&buffer->notify);
834 }
835
836 static int
837 nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
838 {
839         struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
840         struct nvif_object *device = &svm->drm->client.device.object;
841         buffer->get = nvif_rd32(device, buffer->getaddr);
842         buffer->put = nvif_rd32(device, buffer->putaddr);
843         SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
844         return nvif_notify_get(&buffer->notify);
845 }
846
847 static void
848 nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
849 {
850         struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
851         int i;
852
853         if (buffer->fault) {
854                 for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
855                         kfree(buffer->fault[i]);
856                 kvfree(buffer->fault);
857         }
858
859         nouveau_svm_fault_buffer_fini(svm, id);
860
861         nvif_notify_dtor(&buffer->notify);
862         nvif_object_dtor(&buffer->object);
863 }
864
865 static int
866 nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
867 {
868         struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
869         struct nouveau_drm *drm = svm->drm;
870         struct nvif_object *device = &drm->client.device.object;
871         struct nvif_clb069_v0 args = {};
872         int ret;
873
874         buffer->id = id;
875
876         ret = nvif_object_ctor(device, "svmFaultBuffer", 0, oclass, &args,
877                                sizeof(args), &buffer->object);
878         if (ret < 0) {
879                 SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
880                 return ret;
881         }
882
883         nvif_object_map(&buffer->object, NULL, 0);
884         buffer->entries = args.entries;
885         buffer->getaddr = args.get;
886         buffer->putaddr = args.put;
887
888         ret = nvif_notify_ctor(&buffer->object, "svmFault", nouveau_svm_fault,
889                                true, NVB069_V0_NTFY_FAULT, NULL, 0, 0,
890                                &buffer->notify);
891         if (ret)
892                 return ret;
893
894         buffer->fault = kvzalloc(sizeof(*buffer->fault) * buffer->entries, GFP_KERNEL);
895         if (!buffer->fault)
896                 return -ENOMEM;
897
898         return nouveau_svm_fault_buffer_init(svm, id);
899 }
900
901 void
902 nouveau_svm_resume(struct nouveau_drm *drm)
903 {
904         struct nouveau_svm *svm = drm->svm;
905         if (svm)
906                 nouveau_svm_fault_buffer_init(svm, 0);
907 }
908
909 void
910 nouveau_svm_suspend(struct nouveau_drm *drm)
911 {
912         struct nouveau_svm *svm = drm->svm;
913         if (svm)
914                 nouveau_svm_fault_buffer_fini(svm, 0);
915 }
916
917 void
918 nouveau_svm_fini(struct nouveau_drm *drm)
919 {
920         struct nouveau_svm *svm = drm->svm;
921         if (svm) {
922                 nouveau_svm_fault_buffer_dtor(svm, 0);
923                 kfree(drm->svm);
924                 drm->svm = NULL;
925         }
926 }
927
928 void
929 nouveau_svm_init(struct nouveau_drm *drm)
930 {
931         static const struct nvif_mclass buffers[] = {
932                 {   VOLTA_FAULT_BUFFER_A, 0 },
933                 { MAXWELL_FAULT_BUFFER_A, 0 },
934                 {}
935         };
936         struct nouveau_svm *svm;
937         int ret;
938
939         /* Disable on Volta and newer until channel recovery is fixed,
940          * otherwise clients will have a trivial way to trash the GPU
941          * for everyone.
942          */
943         if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
944                 return;
945
946         if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL)))
947                 return;
948
949         drm->svm->drm = drm;
950         mutex_init(&drm->svm->mutex);
951         INIT_LIST_HEAD(&drm->svm->inst);
952
953         ret = nvif_mclass(&drm->client.device.object, buffers);
954         if (ret < 0) {
955                 SVM_DBG(svm, "No supported fault buffer class");
956                 nouveau_svm_fini(drm);
957                 return;
958         }
959
960         ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0);
961         if (ret) {
962                 nouveau_svm_fini(drm);
963                 return;
964         }
965
966         SVM_DBG(svm, "Initialised");
967 }