dc774ddf34456461a0818c4cb0955efadfdc566c
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdkfd / kfd_chardev.c
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /*
3  * Copyright 2014-2022 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23
24 #include <linux/device.h>
25 #include <linux/export.h>
26 #include <linux/err.h>
27 #include <linux/fs.h>
28 #include <linux/file.h>
29 #include <linux/sched.h>
30 #include <linux/slab.h>
31 #include <linux/uaccess.h>
32 #include <linux/compat.h>
33 #include <uapi/linux/kfd_ioctl.h>
34 #include <linux/time.h>
35 #include <linux/mm.h>
36 #include <linux/mman.h>
37 #include <linux/ptrace.h>
38 #include <linux/dma-buf.h>
39 #include <linux/fdtable.h>
40 #include <linux/processor.h>
41 #include "kfd_priv.h"
42 #include "kfd_device_queue_manager.h"
43 #include "kfd_svm.h"
44 #include "amdgpu_amdkfd.h"
45 #include "kfd_smi_events.h"
46 #include "amdgpu_dma_buf.h"
47
48 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
49 static int kfd_open(struct inode *, struct file *);
50 static int kfd_release(struct inode *, struct file *);
51 static int kfd_mmap(struct file *, struct vm_area_struct *);
52
53 static const char kfd_dev_name[] = "kfd";
54
55 static const struct file_operations kfd_fops = {
56         .owner = THIS_MODULE,
57         .unlocked_ioctl = kfd_ioctl,
58         .compat_ioctl = compat_ptr_ioctl,
59         .open = kfd_open,
60         .release = kfd_release,
61         .mmap = kfd_mmap,
62 };
63
64 static int kfd_char_dev_major = -1;
65 static struct class *kfd_class;
66 struct device *kfd_device;
67
68 static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
69 {
70         struct kfd_process_device *pdd;
71
72         mutex_lock(&p->mutex);
73         pdd = kfd_process_device_data_by_id(p, gpu_id);
74
75         if (pdd)
76                 return pdd;
77
78         mutex_unlock(&p->mutex);
79         return NULL;
80 }
81
82 static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
83 {
84         mutex_unlock(&pdd->process->mutex);
85 }
86
87 int kfd_chardev_init(void)
88 {
89         int err = 0;
90
91         kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
92         err = kfd_char_dev_major;
93         if (err < 0)
94                 goto err_register_chrdev;
95
96         kfd_class = class_create(THIS_MODULE, kfd_dev_name);
97         err = PTR_ERR(kfd_class);
98         if (IS_ERR(kfd_class))
99                 goto err_class_create;
100
101         kfd_device = device_create(kfd_class, NULL,
102                                         MKDEV(kfd_char_dev_major, 0),
103                                         NULL, kfd_dev_name);
104         err = PTR_ERR(kfd_device);
105         if (IS_ERR(kfd_device))
106                 goto err_device_create;
107
108         return 0;
109
110 err_device_create:
111         class_destroy(kfd_class);
112 err_class_create:
113         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
114 err_register_chrdev:
115         return err;
116 }
117
118 void kfd_chardev_exit(void)
119 {
120         device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
121         class_destroy(kfd_class);
122         unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
123         kfd_device = NULL;
124 }
125
126
127 static int kfd_open(struct inode *inode, struct file *filep)
128 {
129         struct kfd_process *process;
130         bool is_32bit_user_mode;
131
132         if (iminor(inode) != 0)
133                 return -ENODEV;
134
135         is_32bit_user_mode = in_compat_syscall();
136
137         if (is_32bit_user_mode) {
138                 dev_warn(kfd_device,
139                         "Process %d (32-bit) failed to open /dev/kfd\n"
140                         "32-bit processes are not supported by amdkfd\n",
141                         current->pid);
142                 return -EPERM;
143         }
144
145         process = kfd_create_process(filep);
146         if (IS_ERR(process))
147                 return PTR_ERR(process);
148
149         if (kfd_is_locked()) {
150                 dev_dbg(kfd_device, "kfd is locked!\n"
151                                 "process %d unreferenced", process->pasid);
152                 kfd_unref_process(process);
153                 return -EAGAIN;
154         }
155
156         /* filep now owns the reference returned by kfd_create_process */
157         filep->private_data = process;
158
159         dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
160                 process->pasid, process->is_32bit_user_mode);
161
162         return 0;
163 }
164
165 static int kfd_release(struct inode *inode, struct file *filep)
166 {
167         struct kfd_process *process = filep->private_data;
168
169         if (process)
170                 kfd_unref_process(process);
171
172         return 0;
173 }
174
175 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
176                                         void *data)
177 {
178         struct kfd_ioctl_get_version_args *args = data;
179
180         args->major_version = KFD_IOCTL_MAJOR_VERSION;
181         args->minor_version = KFD_IOCTL_MINOR_VERSION;
182
183         return 0;
184 }
185
186 static int set_queue_properties_from_user(struct queue_properties *q_properties,
187                                 struct kfd_ioctl_create_queue_args *args)
188 {
189         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
190                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
191                 return -EINVAL;
192         }
193
194         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
195                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
196                 return -EINVAL;
197         }
198
199         if ((args->ring_base_address) &&
200                 (!access_ok((const void __user *) args->ring_base_address,
201                         sizeof(uint64_t)))) {
202                 pr_err("Can't access ring base address\n");
203                 return -EFAULT;
204         }
205
206         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
207                 pr_err("Ring size must be a power of 2 or 0\n");
208                 return -EINVAL;
209         }
210
211         if (!access_ok((const void __user *) args->read_pointer_address,
212                         sizeof(uint32_t))) {
213                 pr_err("Can't access read pointer\n");
214                 return -EFAULT;
215         }
216
217         if (!access_ok((const void __user *) args->write_pointer_address,
218                         sizeof(uint32_t))) {
219                 pr_err("Can't access write pointer\n");
220                 return -EFAULT;
221         }
222
223         if (args->eop_buffer_address &&
224                 !access_ok((const void __user *) args->eop_buffer_address,
225                         sizeof(uint32_t))) {
226                 pr_debug("Can't access eop buffer");
227                 return -EFAULT;
228         }
229
230         if (args->ctx_save_restore_address &&
231                 !access_ok((const void __user *) args->ctx_save_restore_address,
232                         sizeof(uint32_t))) {
233                 pr_debug("Can't access ctx save restore buffer");
234                 return -EFAULT;
235         }
236
237         q_properties->is_interop = false;
238         q_properties->is_gws = false;
239         q_properties->queue_percent = args->queue_percentage;
240         q_properties->priority = args->queue_priority;
241         q_properties->queue_address = args->ring_base_address;
242         q_properties->queue_size = args->ring_size;
243         q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
244         q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
245         q_properties->eop_ring_buffer_address = args->eop_buffer_address;
246         q_properties->eop_ring_buffer_size = args->eop_buffer_size;
247         q_properties->ctx_save_restore_area_address =
248                         args->ctx_save_restore_address;
249         q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
250         q_properties->ctl_stack_size = args->ctl_stack_size;
251         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
252                 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
253                 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
254         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
255                 q_properties->type = KFD_QUEUE_TYPE_SDMA;
256         else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
257                 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
258         else
259                 return -ENOTSUPP;
260
261         if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
262                 q_properties->format = KFD_QUEUE_FORMAT_AQL;
263         else
264                 q_properties->format = KFD_QUEUE_FORMAT_PM4;
265
266         pr_debug("Queue Percentage: %d, %d\n",
267                         q_properties->queue_percent, args->queue_percentage);
268
269         pr_debug("Queue Priority: %d, %d\n",
270                         q_properties->priority, args->queue_priority);
271
272         pr_debug("Queue Address: 0x%llX, 0x%llX\n",
273                         q_properties->queue_address, args->ring_base_address);
274
275         pr_debug("Queue Size: 0x%llX, %u\n",
276                         q_properties->queue_size, args->ring_size);
277
278         pr_debug("Queue r/w Pointers: %px, %px\n",
279                         q_properties->read_ptr,
280                         q_properties->write_ptr);
281
282         pr_debug("Queue Format: %d\n", q_properties->format);
283
284         pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
285
286         pr_debug("Queue CTX save area: 0x%llX\n",
287                         q_properties->ctx_save_restore_area_address);
288
289         return 0;
290 }
291
292 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
293                                         void *data)
294 {
295         struct kfd_ioctl_create_queue_args *args = data;
296         struct kfd_dev *dev;
297         int err = 0;
298         unsigned int queue_id;
299         struct kfd_process_device *pdd;
300         struct queue_properties q_properties;
301         uint32_t doorbell_offset_in_process = 0;
302         struct amdgpu_bo *wptr_bo = NULL;
303
304         memset(&q_properties, 0, sizeof(struct queue_properties));
305
306         pr_debug("Creating queue ioctl\n");
307
308         err = set_queue_properties_from_user(&q_properties, args);
309         if (err)
310                 return err;
311
312         pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
313
314         mutex_lock(&p->mutex);
315
316         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
317         if (!pdd) {
318                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
319                 err = -EINVAL;
320                 goto err_pdd;
321         }
322         dev = pdd->dev;
323
324         pdd = kfd_bind_process_to_device(dev, p);
325         if (IS_ERR(pdd)) {
326                 err = -ESRCH;
327                 goto err_bind_process;
328         }
329
330         /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
331          * on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
332          */
333         if (dev->shared_resources.enable_mes &&
334                         ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
335                         >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
336                 struct amdgpu_bo_va_mapping *wptr_mapping;
337                 struct amdgpu_vm *wptr_vm;
338
339                 wptr_vm = drm_priv_to_vm(pdd->drm_priv);
340                 err = amdgpu_bo_reserve(wptr_vm->root.bo, false);
341                 if (err)
342                         goto err_wptr_map_gart;
343
344                 wptr_mapping = amdgpu_vm_bo_lookup_mapping(
345                                 wptr_vm, args->write_pointer_address >> PAGE_SHIFT);
346                 amdgpu_bo_unreserve(wptr_vm->root.bo);
347                 if (!wptr_mapping) {
348                         pr_err("Failed to lookup wptr bo\n");
349                         err = -EINVAL;
350                         goto err_wptr_map_gart;
351                 }
352
353                 wptr_bo = wptr_mapping->bo_va->base.bo;
354                 if (wptr_bo->tbo.base.size > PAGE_SIZE) {
355                         pr_err("Requested GART mapping for wptr bo larger than one page\n");
356                         err = -EINVAL;
357                         goto err_wptr_map_gart;
358                 }
359
360                 err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo);
361                 if (err) {
362                         pr_err("Failed to map wptr bo to GART\n");
363                         goto err_wptr_map_gart;
364                 }
365         }
366
367         pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
368                         p->pasid,
369                         dev->id);
370
371         err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo,
372                         NULL, NULL, NULL, &doorbell_offset_in_process);
373         if (err != 0)
374                 goto err_create_queue;
375
376         args->queue_id = queue_id;
377
378
379         /* Return gpu_id as doorbell offset for mmap usage */
380         args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
381         args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
382         if (KFD_IS_SOC15(dev))
383                 /* On SOC15 ASICs, include the doorbell offset within the
384                  * process doorbell frame, which is 2 pages.
385                  */
386                 args->doorbell_offset |= doorbell_offset_in_process;
387
388         mutex_unlock(&p->mutex);
389
390         pr_debug("Queue id %d was created successfully\n", args->queue_id);
391
392         pr_debug("Ring buffer address == 0x%016llX\n",
393                         args->ring_base_address);
394
395         pr_debug("Read ptr address    == 0x%016llX\n",
396                         args->read_pointer_address);
397
398         pr_debug("Write ptr address   == 0x%016llX\n",
399                         args->write_pointer_address);
400
401         return 0;
402
403 err_create_queue:
404         if (wptr_bo)
405                 amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo);
406 err_wptr_map_gart:
407 err_bind_process:
408 err_pdd:
409         mutex_unlock(&p->mutex);
410         return err;
411 }
412
413 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
414                                         void *data)
415 {
416         int retval;
417         struct kfd_ioctl_destroy_queue_args *args = data;
418
419         pr_debug("Destroying queue id %d for pasid 0x%x\n",
420                                 args->queue_id,
421                                 p->pasid);
422
423         mutex_lock(&p->mutex);
424
425         retval = pqm_destroy_queue(&p->pqm, args->queue_id);
426
427         mutex_unlock(&p->mutex);
428         return retval;
429 }
430
431 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
432                                         void *data)
433 {
434         int retval;
435         struct kfd_ioctl_update_queue_args *args = data;
436         struct queue_properties properties;
437
438         if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
439                 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
440                 return -EINVAL;
441         }
442
443         if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
444                 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
445                 return -EINVAL;
446         }
447
448         if ((args->ring_base_address) &&
449                 (!access_ok((const void __user *) args->ring_base_address,
450                         sizeof(uint64_t)))) {
451                 pr_err("Can't access ring base address\n");
452                 return -EFAULT;
453         }
454
455         if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
456                 pr_err("Ring size must be a power of 2 or 0\n");
457                 return -EINVAL;
458         }
459
460         properties.queue_address = args->ring_base_address;
461         properties.queue_size = args->ring_size;
462         properties.queue_percent = args->queue_percentage;
463         properties.priority = args->queue_priority;
464
465         pr_debug("Updating queue id %d for pasid 0x%x\n",
466                         args->queue_id, p->pasid);
467
468         mutex_lock(&p->mutex);
469
470         retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);
471
472         mutex_unlock(&p->mutex);
473
474         return retval;
475 }
476
477 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
478                                         void *data)
479 {
480         int retval;
481         const int max_num_cus = 1024;
482         struct kfd_ioctl_set_cu_mask_args *args = data;
483         struct mqd_update_info minfo = {0};
484         uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
485         size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
486
487         if ((args->num_cu_mask % 32) != 0) {
488                 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
489                                 args->num_cu_mask);
490                 return -EINVAL;
491         }
492
493         minfo.cu_mask.count = args->num_cu_mask;
494         if (minfo.cu_mask.count == 0) {
495                 pr_debug("CU mask cannot be 0");
496                 return -EINVAL;
497         }
498
499         /* To prevent an unreasonably large CU mask size, set an arbitrary
500          * limit of max_num_cus bits.  We can then just drop any CU mask bits
501          * past max_num_cus bits and just use the first max_num_cus bits.
502          */
503         if (minfo.cu_mask.count > max_num_cus) {
504                 pr_debug("CU mask cannot be greater than 1024 bits");
505                 minfo.cu_mask.count = max_num_cus;
506                 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
507         }
508
509         minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
510         if (!minfo.cu_mask.ptr)
511                 return -ENOMEM;
512
513         retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
514         if (retval) {
515                 pr_debug("Could not copy CU mask from userspace");
516                 retval = -EFAULT;
517                 goto out;
518         }
519
520         minfo.update_flag = UPDATE_FLAG_CU_MASK;
521
522         mutex_lock(&p->mutex);
523
524         retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
525
526         mutex_unlock(&p->mutex);
527
528 out:
529         kfree(minfo.cu_mask.ptr);
530         return retval;
531 }
532
533 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
534                                           struct kfd_process *p, void *data)
535 {
536         struct kfd_ioctl_get_queue_wave_state_args *args = data;
537         int r;
538
539         mutex_lock(&p->mutex);
540
541         r = pqm_get_wave_state(&p->pqm, args->queue_id,
542                                (void __user *)args->ctl_stack_address,
543                                &args->ctl_stack_used_size,
544                                &args->save_area_used_size);
545
546         mutex_unlock(&p->mutex);
547
548         return r;
549 }
550
551 static int kfd_ioctl_set_memory_policy(struct file *filep,
552                                         struct kfd_process *p, void *data)
553 {
554         struct kfd_ioctl_set_memory_policy_args *args = data;
555         int err = 0;
556         struct kfd_process_device *pdd;
557         enum cache_policy default_policy, alternate_policy;
558
559         if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
560             && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
561                 return -EINVAL;
562         }
563
564         if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
565             && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
566                 return -EINVAL;
567         }
568
569         mutex_lock(&p->mutex);
570         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
571         if (!pdd) {
572                 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
573                 err = -EINVAL;
574                 goto err_pdd;
575         }
576
577         pdd = kfd_bind_process_to_device(pdd->dev, p);
578         if (IS_ERR(pdd)) {
579                 err = -ESRCH;
580                 goto out;
581         }
582
583         default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
584                          ? cache_policy_coherent : cache_policy_noncoherent;
585
586         alternate_policy =
587                 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
588                    ? cache_policy_coherent : cache_policy_noncoherent;
589
590         if (!pdd->dev->dqm->ops.set_cache_memory_policy(pdd->dev->dqm,
591                                 &pdd->qpd,
592                                 default_policy,
593                                 alternate_policy,
594                                 (void __user *)args->alternate_aperture_base,
595                                 args->alternate_aperture_size))
596                 err = -EINVAL;
597
598 out:
599 err_pdd:
600         mutex_unlock(&p->mutex);
601
602         return err;
603 }
604
605 static int kfd_ioctl_set_trap_handler(struct file *filep,
606                                         struct kfd_process *p, void *data)
607 {
608         struct kfd_ioctl_set_trap_handler_args *args = data;
609         int err = 0;
610         struct kfd_process_device *pdd;
611
612         mutex_lock(&p->mutex);
613
614         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
615         if (!pdd) {
616                 err = -EINVAL;
617                 goto err_pdd;
618         }
619
620         pdd = kfd_bind_process_to_device(pdd->dev, p);
621         if (IS_ERR(pdd)) {
622                 err = -ESRCH;
623                 goto out;
624         }
625
626         kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
627
628 out:
629 err_pdd:
630         mutex_unlock(&p->mutex);
631
632         return err;
633 }
634
635 static int kfd_ioctl_dbg_register(struct file *filep,
636                                 struct kfd_process *p, void *data)
637 {
638         return -EPERM;
639 }
640
641 static int kfd_ioctl_dbg_unregister(struct file *filep,
642                                 struct kfd_process *p, void *data)
643 {
644         return -EPERM;
645 }
646
647 static int kfd_ioctl_dbg_address_watch(struct file *filep,
648                                         struct kfd_process *p, void *data)
649 {
650         return -EPERM;
651 }
652
653 /* Parse and generate fixed size data structure for wave control */
654 static int kfd_ioctl_dbg_wave_control(struct file *filep,
655                                         struct kfd_process *p, void *data)
656 {
657         return -EPERM;
658 }
659
660 static int kfd_ioctl_get_clock_counters(struct file *filep,
661                                 struct kfd_process *p, void *data)
662 {
663         struct kfd_ioctl_get_clock_counters_args *args = data;
664         struct kfd_process_device *pdd;
665
666         mutex_lock(&p->mutex);
667         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
668         mutex_unlock(&p->mutex);
669         if (pdd)
670                 /* Reading GPU clock counter from KGD */
671                 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(pdd->dev->adev);
672         else
673                 /* Node without GPU resource */
674                 args->gpu_clock_counter = 0;
675
676         /* No access to rdtsc. Using raw monotonic time */
677         args->cpu_clock_counter = ktime_get_raw_ns();
678         args->system_clock_counter = ktime_get_boottime_ns();
679
680         /* Since the counter is in nano-seconds we use 1GHz frequency */
681         args->system_clock_freq = 1000000000;
682
683         return 0;
684 }
685
686
687 static int kfd_ioctl_get_process_apertures(struct file *filp,
688                                 struct kfd_process *p, void *data)
689 {
690         struct kfd_ioctl_get_process_apertures_args *args = data;
691         struct kfd_process_device_apertures *pAperture;
692         int i;
693
694         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
695
696         args->num_of_nodes = 0;
697
698         mutex_lock(&p->mutex);
699         /* Run over all pdd of the process */
700         for (i = 0; i < p->n_pdds; i++) {
701                 struct kfd_process_device *pdd = p->pdds[i];
702
703                 pAperture =
704                         &args->process_apertures[args->num_of_nodes];
705                 pAperture->gpu_id = pdd->dev->id;
706                 pAperture->lds_base = pdd->lds_base;
707                 pAperture->lds_limit = pdd->lds_limit;
708                 pAperture->gpuvm_base = pdd->gpuvm_base;
709                 pAperture->gpuvm_limit = pdd->gpuvm_limit;
710                 pAperture->scratch_base = pdd->scratch_base;
711                 pAperture->scratch_limit = pdd->scratch_limit;
712
713                 dev_dbg(kfd_device,
714                         "node id %u\n", args->num_of_nodes);
715                 dev_dbg(kfd_device,
716                         "gpu id %u\n", pdd->dev->id);
717                 dev_dbg(kfd_device,
718                         "lds_base %llX\n", pdd->lds_base);
719                 dev_dbg(kfd_device,
720                         "lds_limit %llX\n", pdd->lds_limit);
721                 dev_dbg(kfd_device,
722                         "gpuvm_base %llX\n", pdd->gpuvm_base);
723                 dev_dbg(kfd_device,
724                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
725                 dev_dbg(kfd_device,
726                         "scratch_base %llX\n", pdd->scratch_base);
727                 dev_dbg(kfd_device,
728                         "scratch_limit %llX\n", pdd->scratch_limit);
729
730                 if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
731                         break;
732         }
733         mutex_unlock(&p->mutex);
734
735         return 0;
736 }
737
738 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
739                                 struct kfd_process *p, void *data)
740 {
741         struct kfd_ioctl_get_process_apertures_new_args *args = data;
742         struct kfd_process_device_apertures *pa;
743         int ret;
744         int i;
745
746         dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
747
748         if (args->num_of_nodes == 0) {
749                 /* Return number of nodes, so that user space can alloacate
750                  * sufficient memory
751                  */
752                 mutex_lock(&p->mutex);
753                 args->num_of_nodes = p->n_pdds;
754                 goto out_unlock;
755         }
756
757         /* Fill in process-aperture information for all available
758          * nodes, but not more than args->num_of_nodes as that is
759          * the amount of memory allocated by user
760          */
761         pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
762                                 args->num_of_nodes), GFP_KERNEL);
763         if (!pa)
764                 return -ENOMEM;
765
766         mutex_lock(&p->mutex);
767
768         if (!p->n_pdds) {
769                 args->num_of_nodes = 0;
770                 kfree(pa);
771                 goto out_unlock;
772         }
773
774         /* Run over all pdd of the process */
775         for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
776                 struct kfd_process_device *pdd = p->pdds[i];
777
778                 pa[i].gpu_id = pdd->dev->id;
779                 pa[i].lds_base = pdd->lds_base;
780                 pa[i].lds_limit = pdd->lds_limit;
781                 pa[i].gpuvm_base = pdd->gpuvm_base;
782                 pa[i].gpuvm_limit = pdd->gpuvm_limit;
783                 pa[i].scratch_base = pdd->scratch_base;
784                 pa[i].scratch_limit = pdd->scratch_limit;
785
786                 dev_dbg(kfd_device,
787                         "gpu id %u\n", pdd->dev->id);
788                 dev_dbg(kfd_device,
789                         "lds_base %llX\n", pdd->lds_base);
790                 dev_dbg(kfd_device,
791                         "lds_limit %llX\n", pdd->lds_limit);
792                 dev_dbg(kfd_device,
793                         "gpuvm_base %llX\n", pdd->gpuvm_base);
794                 dev_dbg(kfd_device,
795                         "gpuvm_limit %llX\n", pdd->gpuvm_limit);
796                 dev_dbg(kfd_device,
797                         "scratch_base %llX\n", pdd->scratch_base);
798                 dev_dbg(kfd_device,
799                         "scratch_limit %llX\n", pdd->scratch_limit);
800         }
801         mutex_unlock(&p->mutex);
802
803         args->num_of_nodes = i;
804         ret = copy_to_user(
805                         (void __user *)args->kfd_process_device_apertures_ptr,
806                         pa,
807                         (i * sizeof(struct kfd_process_device_apertures)));
808         kfree(pa);
809         return ret ? -EFAULT : 0;
810
811 out_unlock:
812         mutex_unlock(&p->mutex);
813         return 0;
814 }
815
816 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
817                                         void *data)
818 {
819         struct kfd_ioctl_create_event_args *args = data;
820         int err;
821
822         /* For dGPUs the event page is allocated in user mode. The
823          * handle is passed to KFD with the first call to this IOCTL
824          * through the event_page_offset field.
825          */
826         if (args->event_page_offset) {
827                 mutex_lock(&p->mutex);
828                 err = kfd_kmap_event_page(p, args->event_page_offset);
829                 mutex_unlock(&p->mutex);
830                 if (err)
831                         return err;
832         }
833
834         err = kfd_event_create(filp, p, args->event_type,
835                                 args->auto_reset != 0, args->node_id,
836                                 &args->event_id, &args->event_trigger_data,
837                                 &args->event_page_offset,
838                                 &args->event_slot_index);
839
840         pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
841         return err;
842 }
843
844 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
845                                         void *data)
846 {
847         struct kfd_ioctl_destroy_event_args *args = data;
848
849         return kfd_event_destroy(p, args->event_id);
850 }
851
852 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
853                                 void *data)
854 {
855         struct kfd_ioctl_set_event_args *args = data;
856
857         return kfd_set_event(p, args->event_id);
858 }
859
860 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
861                                 void *data)
862 {
863         struct kfd_ioctl_reset_event_args *args = data;
864
865         return kfd_reset_event(p, args->event_id);
866 }
867
868 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
869                                 void *data)
870 {
871         struct kfd_ioctl_wait_events_args *args = data;
872         int err;
873
874         err = kfd_wait_on_events(p, args->num_events,
875                         (void __user *)args->events_ptr,
876                         (args->wait_for_all != 0),
877                         &args->timeout, &args->wait_result);
878
879         return err;
880 }
881 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
882                                         struct kfd_process *p, void *data)
883 {
884         struct kfd_ioctl_set_scratch_backing_va_args *args = data;
885         struct kfd_process_device *pdd;
886         struct kfd_dev *dev;
887         long err;
888
889         mutex_lock(&p->mutex);
890         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
891         if (!pdd) {
892                 err = -EINVAL;
893                 goto err_pdd;
894         }
895         dev = pdd->dev;
896
897         pdd = kfd_bind_process_to_device(dev, p);
898         if (IS_ERR(pdd)) {
899                 err = PTR_ERR(pdd);
900                 goto bind_process_to_device_fail;
901         }
902
903         pdd->qpd.sh_hidden_private_base = args->va_addr;
904
905         mutex_unlock(&p->mutex);
906
907         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
908             pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
909                 dev->kfd2kgd->set_scratch_backing_va(
910                         dev->adev, args->va_addr, pdd->qpd.vmid);
911
912         return 0;
913
914 bind_process_to_device_fail:
915 err_pdd:
916         mutex_unlock(&p->mutex);
917         return err;
918 }
919
920 static int kfd_ioctl_get_tile_config(struct file *filep,
921                 struct kfd_process *p, void *data)
922 {
923         struct kfd_ioctl_get_tile_config_args *args = data;
924         struct kfd_process_device *pdd;
925         struct tile_config config;
926         int err = 0;
927
928         mutex_lock(&p->mutex);
929         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
930         mutex_unlock(&p->mutex);
931         if (!pdd)
932                 return -EINVAL;
933
934         amdgpu_amdkfd_get_tile_config(pdd->dev->adev, &config);
935
936         args->gb_addr_config = config.gb_addr_config;
937         args->num_banks = config.num_banks;
938         args->num_ranks = config.num_ranks;
939
940         if (args->num_tile_configs > config.num_tile_configs)
941                 args->num_tile_configs = config.num_tile_configs;
942         err = copy_to_user((void __user *)args->tile_config_ptr,
943                         config.tile_config_ptr,
944                         args->num_tile_configs * sizeof(uint32_t));
945         if (err) {
946                 args->num_tile_configs = 0;
947                 return -EFAULT;
948         }
949
950         if (args->num_macro_tile_configs > config.num_macro_tile_configs)
951                 args->num_macro_tile_configs =
952                                 config.num_macro_tile_configs;
953         err = copy_to_user((void __user *)args->macro_tile_config_ptr,
954                         config.macro_tile_config_ptr,
955                         args->num_macro_tile_configs * sizeof(uint32_t));
956         if (err) {
957                 args->num_macro_tile_configs = 0;
958                 return -EFAULT;
959         }
960
961         return 0;
962 }
963
964 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
965                                 void *data)
966 {
967         struct kfd_ioctl_acquire_vm_args *args = data;
968         struct kfd_process_device *pdd;
969         struct file *drm_file;
970         int ret;
971
972         drm_file = fget(args->drm_fd);
973         if (!drm_file)
974                 return -EINVAL;
975
976         mutex_lock(&p->mutex);
977         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
978         if (!pdd) {
979                 ret = -EINVAL;
980                 goto err_pdd;
981         }
982
983         if (pdd->drm_file) {
984                 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
985                 goto err_drm_file;
986         }
987
988         ret = kfd_process_device_init_vm(pdd, drm_file);
989         if (ret)
990                 goto err_unlock;
991
992         /* On success, the PDD keeps the drm_file reference */
993         mutex_unlock(&p->mutex);
994
995         return 0;
996
997 err_unlock:
998 err_pdd:
999 err_drm_file:
1000         mutex_unlock(&p->mutex);
1001         fput(drm_file);
1002         return ret;
1003 }
1004
1005 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1006 {
1007         if (debug_largebar) {
1008                 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1009                 return true;
1010         }
1011
1012         if (dev->use_iommu_v2)
1013                 return false;
1014
1015         if (dev->local_mem_info.local_mem_size_private == 0 &&
1016                         dev->local_mem_info.local_mem_size_public > 0)
1017                 return true;
1018         return false;
1019 }
1020
1021 static int kfd_ioctl_get_available_memory(struct file *filep,
1022                                           struct kfd_process *p, void *data)
1023 {
1024         struct kfd_ioctl_get_available_memory_args *args = data;
1025         struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);
1026
1027         if (!pdd)
1028                 return -EINVAL;
1029         args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
1030         kfd_unlock_pdd(pdd);
1031         return 0;
1032 }
1033
1034 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1035                                         struct kfd_process *p, void *data)
1036 {
1037         struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1038         struct kfd_process_device *pdd;
1039         void *mem;
1040         struct kfd_dev *dev;
1041         int idr_handle;
1042         long err;
1043         uint64_t offset = args->mmap_offset;
1044         uint32_t flags = args->flags;
1045
1046         if (args->size == 0)
1047                 return -EINVAL;
1048
1049 #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1050         /* Flush pending deferred work to avoid racing with deferred actions
1051          * from previous memory map changes (e.g. munmap).
1052          */
1053         svm_range_list_lock_and_flush_work(&p->svms, current->mm);
1054         mutex_lock(&p->svms.lock);
1055         mmap_write_unlock(current->mm);
1056         if (interval_tree_iter_first(&p->svms.objects,
1057                                      args->va_addr >> PAGE_SHIFT,
1058                                      (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
1059                 pr_err("Address: 0x%llx already allocated by SVM\n",
1060                         args->va_addr);
1061                 mutex_unlock(&p->svms.lock);
1062                 return -EADDRINUSE;
1063         }
1064         mutex_unlock(&p->svms.lock);
1065 #endif
1066         mutex_lock(&p->mutex);
1067         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
1068         if (!pdd) {
1069                 err = -EINVAL;
1070                 goto err_pdd;
1071         }
1072
1073         dev = pdd->dev;
1074
1075         if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1076                 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1077                 !kfd_dev_is_large_bar(dev)) {
1078                 pr_err("Alloc host visible vram on small bar is not allowed\n");
1079                 err = -EINVAL;
1080                 goto err_large_bar;
1081         }
1082
1083         pdd = kfd_bind_process_to_device(dev, p);
1084         if (IS_ERR(pdd)) {
1085                 err = PTR_ERR(pdd);
1086                 goto err_unlock;
1087         }
1088
1089         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1090                 if (args->size != kfd_doorbell_process_slice(dev)) {
1091                         err = -EINVAL;
1092                         goto err_unlock;
1093                 }
1094                 offset = kfd_get_process_doorbells(pdd);
1095         } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1096                 if (args->size != PAGE_SIZE) {
1097                         err = -EINVAL;
1098                         goto err_unlock;
1099                 }
1100                 offset = dev->adev->rmmio_remap.bus_addr;
1101                 if (!offset) {
1102                         err = -ENOMEM;
1103                         goto err_unlock;
1104                 }
1105         }
1106
1107         err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1108                 dev->adev, args->va_addr, args->size,
1109                 pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
1110                 flags, false);
1111
1112         if (err)
1113                 goto err_unlock;
1114
1115         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1116         if (idr_handle < 0) {
1117                 err = -EFAULT;
1118                 goto err_free;
1119         }
1120
1121         /* Update the VRAM usage count */
1122         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1123                 WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1124
1125         mutex_unlock(&p->mutex);
1126
1127         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1128         args->mmap_offset = offset;
1129
1130         /* MMIO is mapped through kfd device
1131          * Generate a kfd mmap offset
1132          */
1133         if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1134                 args->mmap_offset = KFD_MMAP_TYPE_MMIO
1135                                         | KFD_MMAP_GPU_ID(args->gpu_id);
1136
1137         return 0;
1138
1139 err_free:
1140         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,
1141                                                pdd->drm_priv, NULL);
1142 err_unlock:
1143 err_pdd:
1144 err_large_bar:
1145         mutex_unlock(&p->mutex);
1146         return err;
1147 }
1148
1149 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1150                                         struct kfd_process *p, void *data)
1151 {
1152         struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1153         struct kfd_process_device *pdd;
1154         void *mem;
1155         int ret;
1156         uint64_t size = 0;
1157
1158         mutex_lock(&p->mutex);
1159         /*
1160          * Safeguard to prevent user space from freeing signal BO.
1161          * It will be freed at process termination.
1162          */
1163         if (p->signal_handle && (p->signal_handle == args->handle)) {
1164                 pr_err("Free signal BO is not allowed\n");
1165                 ret = -EPERM;
1166                 goto err_unlock;
1167         }
1168
1169         pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
1170         if (!pdd) {
1171                 pr_err("Process device data doesn't exist\n");
1172                 ret = -EINVAL;
1173                 goto err_pdd;
1174         }
1175
1176         mem = kfd_process_device_translate_handle(
1177                 pdd, GET_IDR_HANDLE(args->handle));
1178         if (!mem) {
1179                 ret = -EINVAL;
1180                 goto err_unlock;
1181         }
1182
1183         ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev,
1184                                 (struct kgd_mem *)mem, pdd->drm_priv, &size);
1185
1186         /* If freeing the buffer failed, leave the handle in place for
1187          * clean-up during process tear-down.
1188          */
1189         if (!ret)
1190                 kfd_process_device_remove_obj_handle(
1191                         pdd, GET_IDR_HANDLE(args->handle));
1192
1193         WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1194
1195 err_unlock:
1196 err_pdd:
1197         mutex_unlock(&p->mutex);
1198         return ret;
1199 }
1200
1201 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1202                                         struct kfd_process *p, void *data)
1203 {
1204         struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1205         struct kfd_process_device *pdd, *peer_pdd;
1206         void *mem;
1207         struct kfd_dev *dev;
1208         long err = 0;
1209         int i;
1210         uint32_t *devices_arr = NULL;
1211
1212         if (!args->n_devices) {
1213                 pr_debug("Device IDs array empty\n");
1214                 return -EINVAL;
1215         }
1216         if (args->n_success > args->n_devices) {
1217                 pr_debug("n_success exceeds n_devices\n");
1218                 return -EINVAL;
1219         }
1220
1221         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1222                                     GFP_KERNEL);
1223         if (!devices_arr)
1224                 return -ENOMEM;
1225
1226         err = copy_from_user(devices_arr,
1227                              (void __user *)args->device_ids_array_ptr,
1228                              args->n_devices * sizeof(*devices_arr));
1229         if (err != 0) {
1230                 err = -EFAULT;
1231                 goto copy_from_user_failed;
1232         }
1233
1234         mutex_lock(&p->mutex);
1235         pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
1236         if (!pdd) {
1237                 err = -EINVAL;
1238                 goto get_process_device_data_failed;
1239         }
1240         dev = pdd->dev;
1241
1242         pdd = kfd_bind_process_to_device(dev, p);
1243         if (IS_ERR(pdd)) {
1244                 err = PTR_ERR(pdd);
1245                 goto bind_process_to_device_failed;
1246         }
1247
1248         mem = kfd_process_device_translate_handle(pdd,
1249                                                 GET_IDR_HANDLE(args->handle));
1250         if (!mem) {
1251                 err = -ENOMEM;
1252                 goto get_mem_obj_from_handle_failed;
1253         }
1254
1255         for (i = args->n_success; i < args->n_devices; i++) {
1256                 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1257                 if (!peer_pdd) {
1258                         pr_debug("Getting device by id failed for 0x%x\n",
1259                                  devices_arr[i]);
1260                         err = -EINVAL;
1261                         goto get_mem_obj_from_handle_failed;
1262                 }
1263
1264                 peer_pdd = kfd_bind_process_to_device(peer_pdd->dev, p);
1265                 if (IS_ERR(peer_pdd)) {
1266                         err = PTR_ERR(peer_pdd);
1267                         goto get_mem_obj_from_handle_failed;
1268                 }
1269
1270                 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1271                         peer_pdd->dev->adev, (struct kgd_mem *)mem,
1272                         peer_pdd->drm_priv);
1273                 if (err) {
1274                         struct pci_dev *pdev = peer_pdd->dev->adev->pdev;
1275
1276                         dev_err(dev->adev->dev,
1277                                "Failed to map peer:%04x:%02x:%02x.%d mem_domain:%d\n",
1278                                pci_domain_nr(pdev->bus),
1279                                pdev->bus->number,
1280                                PCI_SLOT(pdev->devfn),
1281                                PCI_FUNC(pdev->devfn),
1282                                ((struct kgd_mem *)mem)->domain);
1283                         goto map_memory_to_gpu_failed;
1284                 }
1285                 args->n_success = i+1;
1286         }
1287
1288         mutex_unlock(&p->mutex);
1289
1290         err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
1291         if (err) {
1292                 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1293                 goto sync_memory_failed;
1294         }
1295
1296         /* Flush TLBs after waiting for the page table updates to complete */
1297         for (i = 0; i < args->n_devices; i++) {
1298                 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1299                 if (WARN_ON_ONCE(!peer_pdd))
1300                         continue;
1301                 kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
1302         }
1303         kfree(devices_arr);
1304
1305         return err;
1306
1307 get_process_device_data_failed:
1308 bind_process_to_device_failed:
1309 get_mem_obj_from_handle_failed:
1310 map_memory_to_gpu_failed:
1311         mutex_unlock(&p->mutex);
1312 copy_from_user_failed:
1313 sync_memory_failed:
1314         kfree(devices_arr);
1315
1316         return err;
1317 }
1318
1319 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1320                                         struct kfd_process *p, void *data)
1321 {
1322         struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1323         struct kfd_process_device *pdd, *peer_pdd;
1324         void *mem;
1325         long err = 0;
1326         uint32_t *devices_arr = NULL, i;
1327
1328         if (!args->n_devices) {
1329                 pr_debug("Device IDs array empty\n");
1330                 return -EINVAL;
1331         }
1332         if (args->n_success > args->n_devices) {
1333                 pr_debug("n_success exceeds n_devices\n");
1334                 return -EINVAL;
1335         }
1336
1337         devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1338                                     GFP_KERNEL);
1339         if (!devices_arr)
1340                 return -ENOMEM;
1341
1342         err = copy_from_user(devices_arr,
1343                              (void __user *)args->device_ids_array_ptr,
1344                              args->n_devices * sizeof(*devices_arr));
1345         if (err != 0) {
1346                 err = -EFAULT;
1347                 goto copy_from_user_failed;
1348         }
1349
1350         mutex_lock(&p->mutex);
1351         pdd = kfd_process_device_data_by_id(p, GET_GPU_ID(args->handle));
1352         if (!pdd) {
1353                 err = -EINVAL;
1354                 goto bind_process_to_device_failed;
1355         }
1356
1357         mem = kfd_process_device_translate_handle(pdd,
1358                                                 GET_IDR_HANDLE(args->handle));
1359         if (!mem) {
1360                 err = -ENOMEM;
1361                 goto get_mem_obj_from_handle_failed;
1362         }
1363
1364         for (i = args->n_success; i < args->n_devices; i++) {
1365                 peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1366                 if (!peer_pdd) {
1367                         err = -EINVAL;
1368                         goto get_mem_obj_from_handle_failed;
1369                 }
1370                 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1371                         peer_pdd->dev->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);
1372                 if (err) {
1373                         pr_err("Failed to unmap from gpu %d/%d\n",
1374                                i, args->n_devices);
1375                         goto unmap_memory_from_gpu_failed;
1376                 }
1377                 args->n_success = i+1;
1378         }
1379         mutex_unlock(&p->mutex);
1380
1381         if (kfd_flush_tlb_after_unmap(pdd->dev)) {
1382                 err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
1383                                 (struct kgd_mem *) mem, true);
1384                 if (err) {
1385                         pr_debug("Sync memory failed, wait interrupted by user signal\n");
1386                         goto sync_memory_failed;
1387                 }
1388
1389                 /* Flush TLBs after waiting for the page table updates to complete */
1390                 for (i = 0; i < args->n_devices; i++) {
1391                         peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
1392                         if (WARN_ON_ONCE(!peer_pdd))
1393                                 continue;
1394                         kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
1395                 }
1396         }
1397         kfree(devices_arr);
1398
1399         return 0;
1400
1401 bind_process_to_device_failed:
1402 get_mem_obj_from_handle_failed:
1403 unmap_memory_from_gpu_failed:
1404         mutex_unlock(&p->mutex);
1405 copy_from_user_failed:
1406 sync_memory_failed:
1407         kfree(devices_arr);
1408         return err;
1409 }
1410
1411 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1412                 struct kfd_process *p, void *data)
1413 {
1414         int retval;
1415         struct kfd_ioctl_alloc_queue_gws_args *args = data;
1416         struct queue *q;
1417         struct kfd_dev *dev;
1418
1419         mutex_lock(&p->mutex);
1420         q = pqm_get_user_queue(&p->pqm, args->queue_id);
1421
1422         if (q) {
1423                 dev = q->device;
1424         } else {
1425                 retval = -EINVAL;
1426                 goto out_unlock;
1427         }
1428
1429         if (!dev->gws) {
1430                 retval = -ENODEV;
1431                 goto out_unlock;
1432         }
1433
1434         if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1435                 retval = -ENODEV;
1436                 goto out_unlock;
1437         }
1438
1439         retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1440         mutex_unlock(&p->mutex);
1441
1442         args->first_gws = 0;
1443         return retval;
1444
1445 out_unlock:
1446         mutex_unlock(&p->mutex);
1447         return retval;
1448 }
1449
1450 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1451                 struct kfd_process *p, void *data)
1452 {
1453         struct kfd_ioctl_get_dmabuf_info_args *args = data;
1454         struct kfd_dev *dev = NULL;
1455         struct amdgpu_device *dmabuf_adev;
1456         void *metadata_buffer = NULL;
1457         uint32_t flags;
1458         unsigned int i;
1459         int r;
1460
1461         /* Find a KFD GPU device that supports the get_dmabuf_info query */
1462         for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1463                 if (dev)
1464                         break;
1465         if (!dev)
1466                 return -EINVAL;
1467
1468         if (args->metadata_ptr) {
1469                 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1470                 if (!metadata_buffer)
1471                         return -ENOMEM;
1472         }
1473
1474         /* Get dmabuf info from KGD */
1475         r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd,
1476                                           &dmabuf_adev, &args->size,
1477                                           metadata_buffer, args->metadata_size,
1478                                           &args->metadata_size, &flags);
1479         if (r)
1480                 goto exit;
1481
1482         /* Reverse-lookup gpu_id from kgd pointer */
1483         dev = kfd_device_by_adev(dmabuf_adev);
1484         if (!dev) {
1485                 r = -EINVAL;
1486                 goto exit;
1487         }
1488         args->gpu_id = dev->id;
1489         args->flags = flags;
1490
1491         /* Copy metadata buffer to user mode */
1492         if (metadata_buffer) {
1493                 r = copy_to_user((void __user *)args->metadata_ptr,
1494                                  metadata_buffer, args->metadata_size);
1495                 if (r != 0)
1496                         r = -EFAULT;
1497         }
1498
1499 exit:
1500         kfree(metadata_buffer);
1501
1502         return r;
1503 }
1504
1505 static int kfd_ioctl_import_dmabuf(struct file *filep,
1506                                    struct kfd_process *p, void *data)
1507 {
1508         struct kfd_ioctl_import_dmabuf_args *args = data;
1509         struct kfd_process_device *pdd;
1510         struct dma_buf *dmabuf;
1511         int idr_handle;
1512         uint64_t size;
1513         void *mem;
1514         int r;
1515
1516         dmabuf = dma_buf_get(args->dmabuf_fd);
1517         if (IS_ERR(dmabuf))
1518                 return PTR_ERR(dmabuf);
1519
1520         mutex_lock(&p->mutex);
1521         pdd = kfd_process_device_data_by_id(p, args->gpu_id);
1522         if (!pdd) {
1523                 r = -EINVAL;
1524                 goto err_unlock;
1525         }
1526
1527         pdd = kfd_bind_process_to_device(pdd->dev, p);
1528         if (IS_ERR(pdd)) {
1529                 r = PTR_ERR(pdd);
1530                 goto err_unlock;
1531         }
1532
1533         r = amdgpu_amdkfd_gpuvm_import_dmabuf(pdd->dev->adev, dmabuf,
1534                                               args->va_addr, pdd->drm_priv,
1535                                               (struct kgd_mem **)&mem, &size,
1536                                               NULL);
1537         if (r)
1538                 goto err_unlock;
1539
1540         idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1541         if (idr_handle < 0) {
1542                 r = -EFAULT;
1543                 goto err_free;
1544         }
1545
1546         mutex_unlock(&p->mutex);
1547         dma_buf_put(dmabuf);
1548
1549         args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1550
1551         return 0;
1552
1553 err_free:
1554         amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, (struct kgd_mem *)mem,
1555                                                pdd->drm_priv, NULL);
1556 err_unlock:
1557         mutex_unlock(&p->mutex);
1558         dma_buf_put(dmabuf);
1559         return r;
1560 }
1561
1562 /* Handle requests for watching SMI events */
1563 static int kfd_ioctl_smi_events(struct file *filep,
1564                                 struct kfd_process *p, void *data)
1565 {
1566         struct kfd_ioctl_smi_events_args *args = data;
1567         struct kfd_process_device *pdd;
1568
1569         mutex_lock(&p->mutex);
1570
1571         pdd = kfd_process_device_data_by_id(p, args->gpuid);
1572         mutex_unlock(&p->mutex);
1573         if (!pdd)
1574                 return -EINVAL;
1575
1576         return kfd_smi_event_open(pdd->dev, &args->anon_fd);
1577 }
1578
1579 static int kfd_ioctl_set_xnack_mode(struct file *filep,
1580                                     struct kfd_process *p, void *data)
1581 {
1582         struct kfd_ioctl_set_xnack_mode_args *args = data;
1583         int r = 0;
1584
1585         mutex_lock(&p->mutex);
1586         if (args->xnack_enabled >= 0) {
1587                 if (!list_empty(&p->pqm.queues)) {
1588                         pr_debug("Process has user queues running\n");
1589                         mutex_unlock(&p->mutex);
1590                         return -EBUSY;
1591                 }
1592                 if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
1593                         r = -EPERM;
1594                 else
1595                         p->xnack_enabled = args->xnack_enabled;
1596         } else {
1597                 args->xnack_enabled = p->xnack_enabled;
1598         }
1599         mutex_unlock(&p->mutex);
1600
1601         return r;
1602 }
1603
1604 #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1605 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1606 {
1607         struct kfd_ioctl_svm_args *args = data;
1608         int r = 0;
1609
1610         pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
1611                  args->start_addr, args->size, args->op, args->nattr);
1612
1613         if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
1614                 return -EINVAL;
1615         if (!args->start_addr || !args->size)
1616                 return -EINVAL;
1617
1618         r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
1619                       args->attrs);
1620
1621         return r;
1622 }
1623 #else
1624 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1625 {
1626         return -EPERM;
1627 }
1628 #endif
1629
1630 static int criu_checkpoint_process(struct kfd_process *p,
1631                              uint8_t __user *user_priv_data,
1632                              uint64_t *priv_offset)
1633 {
1634         struct kfd_criu_process_priv_data process_priv;
1635         int ret;
1636
1637         memset(&process_priv, 0, sizeof(process_priv));
1638
1639         process_priv.version = KFD_CRIU_PRIV_VERSION;
1640         /* For CR, we don't consider negative xnack mode which is used for
1641          * querying without changing it, here 0 simply means disabled and 1
1642          * means enabled so retry for finding a valid PTE.
1643          */
1644         process_priv.xnack_mode = p->xnack_enabled ? 1 : 0;
1645
1646         ret = copy_to_user(user_priv_data + *priv_offset,
1647                                 &process_priv, sizeof(process_priv));
1648
1649         if (ret) {
1650                 pr_err("Failed to copy process information to user\n");
1651                 ret = -EFAULT;
1652         }
1653
1654         *priv_offset += sizeof(process_priv);
1655         return ret;
1656 }
1657
1658 static int criu_checkpoint_devices(struct kfd_process *p,
1659                              uint32_t num_devices,
1660                              uint8_t __user *user_addr,
1661                              uint8_t __user *user_priv_data,
1662                              uint64_t *priv_offset)
1663 {
1664         struct kfd_criu_device_priv_data *device_priv = NULL;
1665         struct kfd_criu_device_bucket *device_buckets = NULL;
1666         int ret = 0, i;
1667
1668         device_buckets = kvzalloc(num_devices * sizeof(*device_buckets), GFP_KERNEL);
1669         if (!device_buckets) {
1670                 ret = -ENOMEM;
1671                 goto exit;
1672         }
1673
1674         device_priv = kvzalloc(num_devices * sizeof(*device_priv), GFP_KERNEL);
1675         if (!device_priv) {
1676                 ret = -ENOMEM;
1677                 goto exit;
1678         }
1679
1680         for (i = 0; i < num_devices; i++) {
1681                 struct kfd_process_device *pdd = p->pdds[i];
1682
1683                 device_buckets[i].user_gpu_id = pdd->user_gpu_id;
1684                 device_buckets[i].actual_gpu_id = pdd->dev->id;
1685
1686                 /*
1687                  * priv_data does not contain useful information for now and is reserved for
1688                  * future use, so we do not set its contents.
1689                  */
1690         }
1691
1692         ret = copy_to_user(user_addr, device_buckets, num_devices * sizeof(*device_buckets));
1693         if (ret) {
1694                 pr_err("Failed to copy device information to user\n");
1695                 ret = -EFAULT;
1696                 goto exit;
1697         }
1698
1699         ret = copy_to_user(user_priv_data + *priv_offset,
1700                            device_priv,
1701                            num_devices * sizeof(*device_priv));
1702         if (ret) {
1703                 pr_err("Failed to copy device information to user\n");
1704                 ret = -EFAULT;
1705         }
1706         *priv_offset += num_devices * sizeof(*device_priv);
1707
1708 exit:
1709         kvfree(device_buckets);
1710         kvfree(device_priv);
1711         return ret;
1712 }
1713
1714 static uint32_t get_process_num_bos(struct kfd_process *p)
1715 {
1716         uint32_t num_of_bos = 0;
1717         int i;
1718
1719         /* Run over all PDDs of the process */
1720         for (i = 0; i < p->n_pdds; i++) {
1721                 struct kfd_process_device *pdd = p->pdds[i];
1722                 void *mem;
1723                 int id;
1724
1725                 idr_for_each_entry(&pdd->alloc_idr, mem, id) {
1726                         struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
1727
1728                         if ((uint64_t)kgd_mem->va > pdd->gpuvm_base)
1729                                 num_of_bos++;
1730                 }
1731         }
1732         return num_of_bos;
1733 }
1734
1735 static int criu_get_prime_handle(struct drm_gem_object *gobj, int flags,
1736                                       u32 *shared_fd)
1737 {
1738         struct dma_buf *dmabuf;
1739         int ret;
1740
1741         dmabuf = amdgpu_gem_prime_export(gobj, flags);
1742         if (IS_ERR(dmabuf)) {
1743                 ret = PTR_ERR(dmabuf);
1744                 pr_err("dmabuf export failed for the BO\n");
1745                 return ret;
1746         }
1747
1748         ret = dma_buf_fd(dmabuf, flags);
1749         if (ret < 0) {
1750                 pr_err("dmabuf create fd failed, ret:%d\n", ret);
1751                 goto out_free_dmabuf;
1752         }
1753
1754         *shared_fd = ret;
1755         return 0;
1756
1757 out_free_dmabuf:
1758         dma_buf_put(dmabuf);
1759         return ret;
1760 }
1761
1762 static int criu_checkpoint_bos(struct kfd_process *p,
1763                                uint32_t num_bos,
1764                                uint8_t __user *user_bos,
1765                                uint8_t __user *user_priv_data,
1766                                uint64_t *priv_offset)
1767 {
1768         struct kfd_criu_bo_bucket *bo_buckets;
1769         struct kfd_criu_bo_priv_data *bo_privs;
1770         int ret = 0, pdd_index, bo_index = 0, id;
1771         void *mem;
1772
1773         bo_buckets = kvzalloc(num_bos * sizeof(*bo_buckets), GFP_KERNEL);
1774         if (!bo_buckets)
1775                 return -ENOMEM;
1776
1777         bo_privs = kvzalloc(num_bos * sizeof(*bo_privs), GFP_KERNEL);
1778         if (!bo_privs) {
1779                 ret = -ENOMEM;
1780                 goto exit;
1781         }
1782
1783         for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
1784                 struct kfd_process_device *pdd = p->pdds[pdd_index];
1785                 struct amdgpu_bo *dumper_bo;
1786                 struct kgd_mem *kgd_mem;
1787
1788                 idr_for_each_entry(&pdd->alloc_idr, mem, id) {
1789                         struct kfd_criu_bo_bucket *bo_bucket;
1790                         struct kfd_criu_bo_priv_data *bo_priv;
1791                         int i, dev_idx = 0;
1792
1793                         if (!mem) {
1794                                 ret = -ENOMEM;
1795                                 goto exit;
1796                         }
1797
1798                         kgd_mem = (struct kgd_mem *)mem;
1799                         dumper_bo = kgd_mem->bo;
1800
1801                         if ((uint64_t)kgd_mem->va <= pdd->gpuvm_base)
1802                                 continue;
1803
1804                         bo_bucket = &bo_buckets[bo_index];
1805                         bo_priv = &bo_privs[bo_index];
1806
1807                         bo_bucket->gpu_id = pdd->user_gpu_id;
1808                         bo_bucket->addr = (uint64_t)kgd_mem->va;
1809                         bo_bucket->size = amdgpu_bo_size(dumper_bo);
1810                         bo_bucket->alloc_flags = (uint32_t)kgd_mem->alloc_flags;
1811                         bo_priv->idr_handle = id;
1812
1813                         if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
1814                                 ret = amdgpu_ttm_tt_get_userptr(&dumper_bo->tbo,
1815                                                                 &bo_priv->user_addr);
1816                                 if (ret) {
1817                                         pr_err("Failed to obtain user address for user-pointer bo\n");
1818                                         goto exit;
1819                                 }
1820                         }
1821                         if (bo_bucket->alloc_flags
1822                             & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
1823                                 ret = criu_get_prime_handle(&dumper_bo->tbo.base,
1824                                                 bo_bucket->alloc_flags &
1825                                                 KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
1826                                                 &bo_bucket->dmabuf_fd);
1827                                 if (ret)
1828                                         goto exit;
1829                         } else {
1830                                 bo_bucket->dmabuf_fd = KFD_INVALID_FD;
1831                         }
1832
1833                         if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
1834                                 bo_bucket->offset = KFD_MMAP_TYPE_DOORBELL |
1835                                         KFD_MMAP_GPU_ID(pdd->dev->id);
1836                         else if (bo_bucket->alloc_flags &
1837                                 KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1838                                 bo_bucket->offset = KFD_MMAP_TYPE_MMIO |
1839                                         KFD_MMAP_GPU_ID(pdd->dev->id);
1840                         else
1841                                 bo_bucket->offset = amdgpu_bo_mmap_offset(dumper_bo);
1842
1843                         for (i = 0; i < p->n_pdds; i++) {
1844                                 if (amdgpu_amdkfd_bo_mapped_to_dev(p->pdds[i]->dev->adev, kgd_mem))
1845                                         bo_priv->mapped_gpuids[dev_idx++] = p->pdds[i]->user_gpu_id;
1846                         }
1847
1848                         pr_debug("bo_size = 0x%llx, bo_addr = 0x%llx bo_offset = 0x%llx\n"
1849                                         "gpu_id = 0x%x alloc_flags = 0x%x idr_handle = 0x%x",
1850                                         bo_bucket->size,
1851                                         bo_bucket->addr,
1852                                         bo_bucket->offset,
1853                                         bo_bucket->gpu_id,
1854                                         bo_bucket->alloc_flags,
1855                                         bo_priv->idr_handle);
1856                         bo_index++;
1857                 }
1858         }
1859
1860         ret = copy_to_user(user_bos, bo_buckets, num_bos * sizeof(*bo_buckets));
1861         if (ret) {
1862                 pr_err("Failed to copy BO information to user\n");
1863                 ret = -EFAULT;
1864                 goto exit;
1865         }
1866
1867         ret = copy_to_user(user_priv_data + *priv_offset, bo_privs, num_bos * sizeof(*bo_privs));
1868         if (ret) {
1869                 pr_err("Failed to copy BO priv information to user\n");
1870                 ret = -EFAULT;
1871                 goto exit;
1872         }
1873
1874         *priv_offset += num_bos * sizeof(*bo_privs);
1875
1876 exit:
1877         while (ret && bo_index--) {
1878                 if (bo_buckets[bo_index].alloc_flags
1879                     & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
1880                         close_fd(bo_buckets[bo_index].dmabuf_fd);
1881         }
1882
1883         kvfree(bo_buckets);
1884         kvfree(bo_privs);
1885         return ret;
1886 }
1887
1888 static int criu_get_process_object_info(struct kfd_process *p,
1889                                         uint32_t *num_devices,
1890                                         uint32_t *num_bos,
1891                                         uint32_t *num_objects,
1892                                         uint64_t *objs_priv_size)
1893 {
1894         uint64_t queues_priv_data_size, svm_priv_data_size, priv_size;
1895         uint32_t num_queues, num_events, num_svm_ranges;
1896         int ret;
1897
1898         *num_devices = p->n_pdds;
1899         *num_bos = get_process_num_bos(p);
1900
1901         ret = kfd_process_get_queue_info(p, &num_queues, &queues_priv_data_size);
1902         if (ret)
1903                 return ret;
1904
1905         num_events = kfd_get_num_events(p);
1906
1907         ret = svm_range_get_info(p, &num_svm_ranges, &svm_priv_data_size);
1908         if (ret)
1909                 return ret;
1910
1911         *num_objects = num_queues + num_events + num_svm_ranges;
1912
1913         if (objs_priv_size) {
1914                 priv_size = sizeof(struct kfd_criu_process_priv_data);
1915                 priv_size += *num_devices * sizeof(struct kfd_criu_device_priv_data);
1916                 priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
1917                 priv_size += queues_priv_data_size;
1918                 priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
1919                 priv_size += svm_priv_data_size;
1920                 *objs_priv_size = priv_size;
1921         }
1922         return 0;
1923 }
1924
1925 static int criu_checkpoint(struct file *filep,
1926                            struct kfd_process *p,
1927                            struct kfd_ioctl_criu_args *args)
1928 {
1929         int ret;
1930         uint32_t num_devices, num_bos, num_objects;
1931         uint64_t priv_size, priv_offset = 0;
1932
1933         if (!args->devices || !args->bos || !args->priv_data)
1934                 return -EINVAL;
1935
1936         mutex_lock(&p->mutex);
1937
1938         if (!p->n_pdds) {
1939                 pr_err("No pdd for given process\n");
1940                 ret = -ENODEV;
1941                 goto exit_unlock;
1942         }
1943
1944         /* Confirm all process queues are evicted */
1945         if (!p->queues_paused) {
1946                 pr_err("Cannot dump process when queues are not in evicted state\n");
1947                 /* CRIU plugin did not call op PROCESS_INFO before checkpointing */
1948                 ret = -EINVAL;
1949                 goto exit_unlock;
1950         }
1951
1952         ret = criu_get_process_object_info(p, &num_devices, &num_bos, &num_objects, &priv_size);
1953         if (ret)
1954                 goto exit_unlock;
1955
1956         if (num_devices != args->num_devices ||
1957             num_bos != args->num_bos ||
1958             num_objects != args->num_objects ||
1959             priv_size != args->priv_data_size) {
1960
1961                 ret = -EINVAL;
1962                 goto exit_unlock;
1963         }
1964
1965         /* each function will store private data inside priv_data and adjust priv_offset */
1966         ret = criu_checkpoint_process(p, (uint8_t __user *)args->priv_data, &priv_offset);
1967         if (ret)
1968                 goto exit_unlock;
1969
1970         ret = criu_checkpoint_devices(p, num_devices, (uint8_t __user *)args->devices,
1971                                 (uint8_t __user *)args->priv_data, &priv_offset);
1972         if (ret)
1973                 goto exit_unlock;
1974
1975         ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
1976                             (uint8_t __user *)args->priv_data, &priv_offset);
1977         if (ret)
1978                 goto exit_unlock;
1979
1980         if (num_objects) {
1981                 ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
1982                                                  &priv_offset);
1983                 if (ret)
1984                         goto close_bo_fds;
1985
1986                 ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
1987                                                  &priv_offset);
1988                 if (ret)
1989                         goto close_bo_fds;
1990
1991                 ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
1992                 if (ret)
1993                         goto close_bo_fds;
1994         }
1995
1996 close_bo_fds:
1997         if (ret) {
1998                 /* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
1999                 uint32_t i;
2000                 struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;
2001
2002                 for (i = 0; i < num_bos; i++) {
2003                         if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
2004                                 close_fd(bo_buckets[i].dmabuf_fd);
2005                 }
2006         }
2007
2008 exit_unlock:
2009         mutex_unlock(&p->mutex);
2010         if (ret)
2011                 pr_err("Failed to dump CRIU ret:%d\n", ret);
2012         else
2013                 pr_debug("CRIU dump ret:%d\n", ret);
2014
2015         return ret;
2016 }
2017
2018 static int criu_restore_process(struct kfd_process *p,
2019                                 struct kfd_ioctl_criu_args *args,
2020                                 uint64_t *priv_offset,
2021                                 uint64_t max_priv_data_size)
2022 {
2023         int ret = 0;
2024         struct kfd_criu_process_priv_data process_priv;
2025
2026         if (*priv_offset + sizeof(process_priv) > max_priv_data_size)
2027                 return -EINVAL;
2028
2029         ret = copy_from_user(&process_priv,
2030                                 (void __user *)(args->priv_data + *priv_offset),
2031                                 sizeof(process_priv));
2032         if (ret) {
2033                 pr_err("Failed to copy process private information from user\n");
2034                 ret = -EFAULT;
2035                 goto exit;
2036         }
2037         *priv_offset += sizeof(process_priv);
2038
2039         if (process_priv.version != KFD_CRIU_PRIV_VERSION) {
2040                 pr_err("Invalid CRIU API version (checkpointed:%d current:%d)\n",
2041                         process_priv.version, KFD_CRIU_PRIV_VERSION);
2042                 return -EINVAL;
2043         }
2044
2045         pr_debug("Setting XNACK mode\n");
2046         if (process_priv.xnack_mode && !kfd_process_xnack_mode(p, true)) {
2047                 pr_err("xnack mode cannot be set\n");
2048                 ret = -EPERM;
2049                 goto exit;
2050         } else {
2051                 pr_debug("set xnack mode: %d\n", process_priv.xnack_mode);
2052                 p->xnack_enabled = process_priv.xnack_mode;
2053         }
2054
2055 exit:
2056         return ret;
2057 }
2058
2059 static int criu_restore_devices(struct kfd_process *p,
2060                                 struct kfd_ioctl_criu_args *args,
2061                                 uint64_t *priv_offset,
2062                                 uint64_t max_priv_data_size)
2063 {
2064         struct kfd_criu_device_bucket *device_buckets;
2065         struct kfd_criu_device_priv_data *device_privs;
2066         int ret = 0;
2067         uint32_t i;
2068
2069         if (args->num_devices != p->n_pdds)
2070                 return -EINVAL;
2071
2072         if (*priv_offset + (args->num_devices * sizeof(*device_privs)) > max_priv_data_size)
2073                 return -EINVAL;
2074
2075         device_buckets = kmalloc_array(args->num_devices, sizeof(*device_buckets), GFP_KERNEL);
2076         if (!device_buckets)
2077                 return -ENOMEM;
2078
2079         ret = copy_from_user(device_buckets, (void __user *)args->devices,
2080                                 args->num_devices * sizeof(*device_buckets));
2081         if (ret) {
2082                 pr_err("Failed to copy devices buckets from user\n");
2083                 ret = -EFAULT;
2084                 goto exit;
2085         }
2086
2087         for (i = 0; i < args->num_devices; i++) {
2088                 struct kfd_dev *dev;
2089                 struct kfd_process_device *pdd;
2090                 struct file *drm_file;
2091
2092                 /* device private data is not currently used */
2093
2094                 if (!device_buckets[i].user_gpu_id) {
2095                         pr_err("Invalid user gpu_id\n");
2096                         ret = -EINVAL;
2097                         goto exit;
2098                 }
2099
2100                 dev = kfd_device_by_id(device_buckets[i].actual_gpu_id);
2101                 if (!dev) {
2102                         pr_err("Failed to find device with gpu_id = %x\n",
2103                                 device_buckets[i].actual_gpu_id);
2104                         ret = -EINVAL;
2105                         goto exit;
2106                 }
2107
2108                 pdd = kfd_get_process_device_data(dev, p);
2109                 if (!pdd) {
2110                         pr_err("Failed to get pdd for gpu_id = %x\n",
2111                                         device_buckets[i].actual_gpu_id);
2112                         ret = -EINVAL;
2113                         goto exit;
2114                 }
2115                 pdd->user_gpu_id = device_buckets[i].user_gpu_id;
2116
2117                 drm_file = fget(device_buckets[i].drm_fd);
2118                 if (!drm_file) {
2119                         pr_err("Invalid render node file descriptor sent from plugin (%d)\n",
2120                                 device_buckets[i].drm_fd);
2121                         ret = -EINVAL;
2122                         goto exit;
2123                 }
2124
2125                 if (pdd->drm_file) {
2126                         ret = -EINVAL;
2127                         goto exit;
2128                 }
2129
2130                 /* create the vm using render nodes for kfd pdd */
2131                 if (kfd_process_device_init_vm(pdd, drm_file)) {
2132                         pr_err("could not init vm for given pdd\n");
2133                         /* On success, the PDD keeps the drm_file reference */
2134                         fput(drm_file);
2135                         ret = -EINVAL;
2136                         goto exit;
2137                 }
2138                 /*
2139                  * pdd now already has the vm bound to render node so below api won't create a new
2140                  * exclusive kfd mapping but use existing one with renderDXXX but is still needed
2141                  * for iommu v2 binding  and runtime pm.
2142                  */
2143                 pdd = kfd_bind_process_to_device(dev, p);
2144                 if (IS_ERR(pdd)) {
2145                         ret = PTR_ERR(pdd);
2146                         goto exit;
2147                 }
2148         }
2149
2150         /*
2151          * We are not copying device private data from user as we are not using the data for now,
2152          * but we still adjust for its private data.
2153          */
2154         *priv_offset += args->num_devices * sizeof(*device_privs);
2155
2156 exit:
2157         kfree(device_buckets);
2158         return ret;
2159 }
2160
2161 static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
2162                                       struct kfd_criu_bo_bucket *bo_bucket,
2163                                       struct kfd_criu_bo_priv_data *bo_priv,
2164                                       struct kgd_mem **kgd_mem)
2165 {
2166         int idr_handle;
2167         int ret;
2168         const bool criu_resume = true;
2169         u64 offset;
2170
2171         if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
2172                 if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev))
2173                         return -EINVAL;
2174
2175                 offset = kfd_get_process_doorbells(pdd);
2176         } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
2177                 /* MMIO BOs need remapped bus address */
2178                 if (bo_bucket->size != PAGE_SIZE) {
2179                         pr_err("Invalid page size\n");
2180                         return -EINVAL;
2181                 }
2182                 offset = pdd->dev->adev->rmmio_remap.bus_addr;
2183                 if (!offset) {
2184                         pr_err("amdgpu_amdkfd_get_mmio_remap_phys_addr failed\n");
2185                         return -ENOMEM;
2186                 }
2187         } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
2188                 offset = bo_priv->user_addr;
2189         }
2190         /* Create the BO */
2191         ret = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(pdd->dev->adev, bo_bucket->addr,
2192                                                       bo_bucket->size, pdd->drm_priv, kgd_mem,
2193                                                       &offset, bo_bucket->alloc_flags, criu_resume);
2194         if (ret) {
2195                 pr_err("Could not create the BO\n");
2196                 return ret;
2197         }
2198         pr_debug("New BO created: size:0x%llx addr:0x%llx offset:0x%llx\n",
2199                  bo_bucket->size, bo_bucket->addr, offset);
2200
2201         /* Restore previous IDR handle */
2202         pr_debug("Restoring old IDR handle for the BO");
2203         idr_handle = idr_alloc(&pdd->alloc_idr, *kgd_mem, bo_priv->idr_handle,
2204                                bo_priv->idr_handle + 1, GFP_KERNEL);
2205
2206         if (idr_handle < 0) {
2207                 pr_err("Could not allocate idr\n");
2208                 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, *kgd_mem, pdd->drm_priv,
2209                                                        NULL);
2210                 return -ENOMEM;
2211         }
2212
2213         if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL)
2214                 bo_bucket->restored_offset = KFD_MMAP_TYPE_DOORBELL | KFD_MMAP_GPU_ID(pdd->dev->id);
2215         if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
2216                 bo_bucket->restored_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(pdd->dev->id);
2217         } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
2218                 bo_bucket->restored_offset = offset;
2219         } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
2220                 bo_bucket->restored_offset = offset;
2221                 /* Update the VRAM usage count */
2222                 WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + bo_bucket->size);
2223         }
2224         return 0;
2225 }
2226
2227 static int criu_restore_bo(struct kfd_process *p,
2228                            struct kfd_criu_bo_bucket *bo_bucket,
2229                            struct kfd_criu_bo_priv_data *bo_priv)
2230 {
2231         struct kfd_process_device *pdd;
2232         struct kgd_mem *kgd_mem;
2233         int ret;
2234         int j;
2235
2236         pr_debug("Restoring BO size:0x%llx addr:0x%llx gpu_id:0x%x flags:0x%x idr_handle:0x%x\n",
2237                  bo_bucket->size, bo_bucket->addr, bo_bucket->gpu_id, bo_bucket->alloc_flags,
2238                  bo_priv->idr_handle);
2239
2240         pdd = kfd_process_device_data_by_id(p, bo_bucket->gpu_id);
2241         if (!pdd) {
2242                 pr_err("Failed to get pdd\n");
2243                 return -ENODEV;
2244         }
2245
2246         ret = criu_restore_memory_of_gpu(pdd, bo_bucket, bo_priv, &kgd_mem);
2247         if (ret)
2248                 return ret;
2249
2250         /* now map these BOs to GPU/s */
2251         for (j = 0; j < p->n_pdds; j++) {
2252                 struct kfd_dev *peer;
2253                 struct kfd_process_device *peer_pdd;
2254
2255                 if (!bo_priv->mapped_gpuids[j])
2256                         break;
2257
2258                 peer_pdd = kfd_process_device_data_by_id(p, bo_priv->mapped_gpuids[j]);
2259                 if (!peer_pdd)
2260                         return -EINVAL;
2261
2262                 peer = peer_pdd->dev;
2263
2264                 peer_pdd = kfd_bind_process_to_device(peer, p);
2265                 if (IS_ERR(peer_pdd))
2266                         return PTR_ERR(peer_pdd);
2267
2268                 ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem,
2269                                                             peer_pdd->drm_priv);
2270                 if (ret) {
2271                         pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds);
2272                         return ret;
2273                 }
2274         }
2275
2276         pr_debug("map memory was successful for the BO\n");
2277         /* create the dmabuf object and export the bo */
2278         if (bo_bucket->alloc_flags
2279             & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
2280                 ret = criu_get_prime_handle(&kgd_mem->bo->tbo.base, DRM_RDWR,
2281                                             &bo_bucket->dmabuf_fd);
2282                 if (ret)
2283                         return ret;
2284         } else {
2285                 bo_bucket->dmabuf_fd = KFD_INVALID_FD;
2286         }
2287
2288         return 0;
2289 }
2290
2291 static int criu_restore_bos(struct kfd_process *p,
2292                             struct kfd_ioctl_criu_args *args,
2293                             uint64_t *priv_offset,
2294                             uint64_t max_priv_data_size)
2295 {
2296         struct kfd_criu_bo_bucket *bo_buckets = NULL;
2297         struct kfd_criu_bo_priv_data *bo_privs = NULL;
2298         int ret = 0;
2299         uint32_t i = 0;
2300
2301         if (*priv_offset + (args->num_bos * sizeof(*bo_privs)) > max_priv_data_size)
2302                 return -EINVAL;
2303
2304         /* Prevent MMU notifications until stage-4 IOCTL (CRIU_RESUME) is received */
2305         amdgpu_amdkfd_block_mmu_notifications(p->kgd_process_info);
2306
2307         bo_buckets = kvmalloc_array(args->num_bos, sizeof(*bo_buckets), GFP_KERNEL);
2308         if (!bo_buckets)
2309                 return -ENOMEM;
2310
2311         ret = copy_from_user(bo_buckets, (void __user *)args->bos,
2312                              args->num_bos * sizeof(*bo_buckets));
2313         if (ret) {
2314                 pr_err("Failed to copy BOs information from user\n");
2315                 ret = -EFAULT;
2316                 goto exit;
2317         }
2318
2319         bo_privs = kvmalloc_array(args->num_bos, sizeof(*bo_privs), GFP_KERNEL);
2320         if (!bo_privs) {
2321                 ret = -ENOMEM;
2322                 goto exit;
2323         }
2324
2325         ret = copy_from_user(bo_privs, (void __user *)args->priv_data + *priv_offset,
2326                              args->num_bos * sizeof(*bo_privs));
2327         if (ret) {
2328                 pr_err("Failed to copy BOs information from user\n");
2329                 ret = -EFAULT;
2330                 goto exit;
2331         }
2332         *priv_offset += args->num_bos * sizeof(*bo_privs);
2333
2334         /* Create and map new BOs */
2335         for (; i < args->num_bos; i++) {
2336                 ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
2337                 if (ret) {
2338                         pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
2339                         goto exit;
2340                 }
2341         } /* done */
2342
2343         /* Copy only the buckets back so user can read bo_buckets[N].restored_offset */
2344         ret = copy_to_user((void __user *)args->bos,
2345                                 bo_buckets,
2346                                 (args->num_bos * sizeof(*bo_buckets)));
2347         if (ret)
2348                 ret = -EFAULT;
2349
2350 exit:
2351         while (ret && i--) {
2352                 if (bo_buckets[i].alloc_flags
2353                    & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
2354                         close_fd(bo_buckets[i].dmabuf_fd);
2355         }
2356         kvfree(bo_buckets);
2357         kvfree(bo_privs);
2358         return ret;
2359 }
2360
2361 static int criu_restore_objects(struct file *filep,
2362                                 struct kfd_process *p,
2363                                 struct kfd_ioctl_criu_args *args,
2364                                 uint64_t *priv_offset,
2365                                 uint64_t max_priv_data_size)
2366 {
2367         int ret = 0;
2368         uint32_t i;
2369
2370         BUILD_BUG_ON(offsetof(struct kfd_criu_queue_priv_data, object_type));
2371         BUILD_BUG_ON(offsetof(struct kfd_criu_event_priv_data, object_type));
2372         BUILD_BUG_ON(offsetof(struct kfd_criu_svm_range_priv_data, object_type));
2373
2374         for (i = 0; i < args->num_objects; i++) {
2375                 uint32_t object_type;
2376
2377                 if (*priv_offset + sizeof(object_type) > max_priv_data_size) {
2378                         pr_err("Invalid private data size\n");
2379                         return -EINVAL;
2380                 }
2381
2382                 ret = get_user(object_type, (uint32_t __user *)(args->priv_data + *priv_offset));
2383                 if (ret) {
2384                         pr_err("Failed to copy private information from user\n");
2385                         goto exit;
2386                 }
2387
2388                 switch (object_type) {
2389                 case KFD_CRIU_OBJECT_TYPE_QUEUE:
2390                         ret = kfd_criu_restore_queue(p, (uint8_t __user *)args->priv_data,
2391                                                      priv_offset, max_priv_data_size);
2392                         if (ret)
2393                                 goto exit;
2394                         break;
2395                 case KFD_CRIU_OBJECT_TYPE_EVENT:
2396                         ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
2397                                                      priv_offset, max_priv_data_size);
2398                         if (ret)
2399                                 goto exit;
2400                         break;
2401                 case KFD_CRIU_OBJECT_TYPE_SVM_RANGE:
2402                         ret = kfd_criu_restore_svm(p, (uint8_t __user *)args->priv_data,
2403                                                      priv_offset, max_priv_data_size);
2404                         if (ret)
2405                                 goto exit;
2406                         break;
2407                 default:
2408                         pr_err("Invalid object type:%u at index:%d\n", object_type, i);
2409                         ret = -EINVAL;
2410                         goto exit;
2411                 }
2412         }
2413 exit:
2414         return ret;
2415 }
2416
2417 static int criu_restore(struct file *filep,
2418                         struct kfd_process *p,
2419                         struct kfd_ioctl_criu_args *args)
2420 {
2421         uint64_t priv_offset = 0;
2422         int ret = 0;
2423
2424         pr_debug("CRIU restore (num_devices:%u num_bos:%u num_objects:%u priv_data_size:%llu)\n",
2425                  args->num_devices, args->num_bos, args->num_objects, args->priv_data_size);
2426
2427         if (!args->bos || !args->devices || !args->priv_data || !args->priv_data_size ||
2428             !args->num_devices || !args->num_bos)
2429                 return -EINVAL;
2430
2431         mutex_lock(&p->mutex);
2432
2433         /*
2434          * Set the process to evicted state to avoid running any new queues before all the memory
2435          * mappings are ready.
2436          */
2437         ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
2438         if (ret)
2439                 goto exit_unlock;
2440
2441         /* Each function will adjust priv_offset based on how many bytes they consumed */
2442         ret = criu_restore_process(p, args, &priv_offset, args->priv_data_size);
2443         if (ret)
2444                 goto exit_unlock;
2445
2446         ret = criu_restore_devices(p, args, &priv_offset, args->priv_data_size);
2447         if (ret)
2448                 goto exit_unlock;
2449
2450         ret = criu_restore_bos(p, args, &priv_offset, args->priv_data_size);
2451         if (ret)
2452                 goto exit_unlock;
2453
2454         ret = criu_restore_objects(filep, p, args, &priv_offset, args->priv_data_size);
2455         if (ret)
2456                 goto exit_unlock;
2457
2458         if (priv_offset != args->priv_data_size) {
2459                 pr_err("Invalid private data size\n");
2460                 ret = -EINVAL;
2461         }
2462
2463 exit_unlock:
2464         mutex_unlock(&p->mutex);
2465         if (ret)
2466                 pr_err("Failed to restore CRIU ret:%d\n", ret);
2467         else
2468                 pr_debug("CRIU restore successful\n");
2469
2470         return ret;
2471 }
2472
2473 static int criu_unpause(struct file *filep,
2474                         struct kfd_process *p,
2475                         struct kfd_ioctl_criu_args *args)
2476 {
2477         int ret;
2478
2479         mutex_lock(&p->mutex);
2480
2481         if (!p->queues_paused) {
2482                 mutex_unlock(&p->mutex);
2483                 return -EINVAL;
2484         }
2485
2486         ret = kfd_process_restore_queues(p);
2487         if (ret)
2488                 pr_err("Failed to unpause queues ret:%d\n", ret);
2489         else
2490                 p->queues_paused = false;
2491
2492         mutex_unlock(&p->mutex);
2493
2494         return ret;
2495 }
2496
2497 static int criu_resume(struct file *filep,
2498                         struct kfd_process *p,
2499                         struct kfd_ioctl_criu_args *args)
2500 {
2501         struct kfd_process *target = NULL;
2502         struct pid *pid = NULL;
2503         int ret = 0;
2504
2505         pr_debug("Inside %s, target pid for criu restore: %d\n", __func__,
2506                  args->pid);
2507
2508         pid = find_get_pid(args->pid);
2509         if (!pid) {
2510                 pr_err("Cannot find pid info for %i\n", args->pid);
2511                 return -ESRCH;
2512         }
2513
2514         pr_debug("calling kfd_lookup_process_by_pid\n");
2515         target = kfd_lookup_process_by_pid(pid);
2516
2517         put_pid(pid);
2518
2519         if (!target) {
2520                 pr_debug("Cannot find process info for %i\n", args->pid);
2521                 return -ESRCH;
2522         }
2523
2524         mutex_lock(&target->mutex);
2525         ret = kfd_criu_resume_svm(target);
2526         if (ret) {
2527                 pr_err("kfd_criu_resume_svm failed for %i\n", args->pid);
2528                 goto exit;
2529         }
2530
2531         ret =  amdgpu_amdkfd_criu_resume(target->kgd_process_info);
2532         if (ret)
2533                 pr_err("amdgpu_amdkfd_criu_resume failed for %i\n", args->pid);
2534
2535 exit:
2536         mutex_unlock(&target->mutex);
2537
2538         kfd_unref_process(target);
2539         return ret;
2540 }
2541
2542 static int criu_process_info(struct file *filep,
2543                                 struct kfd_process *p,
2544                                 struct kfd_ioctl_criu_args *args)
2545 {
2546         int ret = 0;
2547
2548         mutex_lock(&p->mutex);
2549
2550         if (!p->n_pdds) {
2551                 pr_err("No pdd for given process\n");
2552                 ret = -ENODEV;
2553                 goto err_unlock;
2554         }
2555
2556         ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
2557         if (ret)
2558                 goto err_unlock;
2559
2560         p->queues_paused = true;
2561
2562         args->pid = task_pid_nr_ns(p->lead_thread,
2563                                         task_active_pid_ns(p->lead_thread));
2564
2565         ret = criu_get_process_object_info(p, &args->num_devices, &args->num_bos,
2566                                            &args->num_objects, &args->priv_data_size);
2567         if (ret)
2568                 goto err_unlock;
2569
2570         dev_dbg(kfd_device, "Num of devices:%u bos:%u objects:%u priv_data_size:%lld\n",
2571                                 args->num_devices, args->num_bos, args->num_objects,
2572                                 args->priv_data_size);
2573
2574 err_unlock:
2575         if (ret) {
2576                 kfd_process_restore_queues(p);
2577                 p->queues_paused = false;
2578         }
2579         mutex_unlock(&p->mutex);
2580         return ret;
2581 }
2582
2583 static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
2584 {
2585         struct kfd_ioctl_criu_args *args = data;
2586         int ret;
2587
2588         dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
2589         switch (args->op) {
2590         case KFD_CRIU_OP_PROCESS_INFO:
2591                 ret = criu_process_info(filep, p, args);
2592                 break;
2593         case KFD_CRIU_OP_CHECKPOINT:
2594                 ret = criu_checkpoint(filep, p, args);
2595                 break;
2596         case KFD_CRIU_OP_UNPAUSE:
2597                 ret = criu_unpause(filep, p, args);
2598                 break;
2599         case KFD_CRIU_OP_RESTORE:
2600                 ret = criu_restore(filep, p, args);
2601                 break;
2602         case KFD_CRIU_OP_RESUME:
2603                 ret = criu_resume(filep, p, args);
2604                 break;
2605         default:
2606                 dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
2607                 ret = -EINVAL;
2608                 break;
2609         }
2610
2611         if (ret)
2612                 dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);
2613
2614         return ret;
2615 }
2616
2617 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
2618         [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
2619                             .cmd_drv = 0, .name = #ioctl}
2620
2621 /** Ioctl table */
2622 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
2623         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
2624                         kfd_ioctl_get_version, 0),
2625
2626         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
2627                         kfd_ioctl_create_queue, 0),
2628
2629         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
2630                         kfd_ioctl_destroy_queue, 0),
2631
2632         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
2633                         kfd_ioctl_set_memory_policy, 0),
2634
2635         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
2636                         kfd_ioctl_get_clock_counters, 0),
2637
2638         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
2639                         kfd_ioctl_get_process_apertures, 0),
2640
2641         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
2642                         kfd_ioctl_update_queue, 0),
2643
2644         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
2645                         kfd_ioctl_create_event, 0),
2646
2647         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
2648                         kfd_ioctl_destroy_event, 0),
2649
2650         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
2651                         kfd_ioctl_set_event, 0),
2652
2653         AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
2654                         kfd_ioctl_reset_event, 0),
2655
2656         AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
2657                         kfd_ioctl_wait_events, 0),
2658
2659         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER_DEPRECATED,
2660                         kfd_ioctl_dbg_register, 0),
2661
2662         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER_DEPRECATED,
2663                         kfd_ioctl_dbg_unregister, 0),
2664
2665         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH_DEPRECATED,
2666                         kfd_ioctl_dbg_address_watch, 0),
2667
2668         AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL_DEPRECATED,
2669                         kfd_ioctl_dbg_wave_control, 0),
2670
2671         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
2672                         kfd_ioctl_set_scratch_backing_va, 0),
2673
2674         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
2675                         kfd_ioctl_get_tile_config, 0),
2676
2677         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
2678                         kfd_ioctl_set_trap_handler, 0),
2679
2680         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
2681                         kfd_ioctl_get_process_apertures_new, 0),
2682
2683         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
2684                         kfd_ioctl_acquire_vm, 0),
2685
2686         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
2687                         kfd_ioctl_alloc_memory_of_gpu, 0),
2688
2689         AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
2690                         kfd_ioctl_free_memory_of_gpu, 0),
2691
2692         AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
2693                         kfd_ioctl_map_memory_to_gpu, 0),
2694
2695         AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
2696                         kfd_ioctl_unmap_memory_from_gpu, 0),
2697
2698         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
2699                         kfd_ioctl_set_cu_mask, 0),
2700
2701         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
2702                         kfd_ioctl_get_queue_wave_state, 0),
2703
2704         AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
2705                                 kfd_ioctl_get_dmabuf_info, 0),
2706
2707         AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
2708                                 kfd_ioctl_import_dmabuf, 0),
2709
2710         AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
2711                         kfd_ioctl_alloc_queue_gws, 0),
2712
2713         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
2714                         kfd_ioctl_smi_events, 0),
2715
2716         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
2717
2718         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
2719                         kfd_ioctl_set_xnack_mode, 0),
2720
2721         AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
2722                         kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),
2723
2724         AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
2725                         kfd_ioctl_get_available_memory, 0),
2726 };
2727
2728 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
2729
2730 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
2731 {
2732         struct kfd_process *process;
2733         amdkfd_ioctl_t *func;
2734         const struct amdkfd_ioctl_desc *ioctl = NULL;
2735         unsigned int nr = _IOC_NR(cmd);
2736         char stack_kdata[128];
2737         char *kdata = NULL;
2738         unsigned int usize, asize;
2739         int retcode = -EINVAL;
2740         bool ptrace_attached = false;
2741
2742         if (nr >= AMDKFD_CORE_IOCTL_COUNT)
2743                 goto err_i1;
2744
2745         if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
2746                 u32 amdkfd_size;
2747
2748                 ioctl = &amdkfd_ioctls[nr];
2749
2750                 amdkfd_size = _IOC_SIZE(ioctl->cmd);
2751                 usize = asize = _IOC_SIZE(cmd);
2752                 if (amdkfd_size > asize)
2753                         asize = amdkfd_size;
2754
2755                 cmd = ioctl->cmd;
2756         } else
2757                 goto err_i1;
2758
2759         dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
2760
2761         /* Get the process struct from the filep. Only the process
2762          * that opened /dev/kfd can use the file descriptor. Child
2763          * processes need to create their own KFD device context.
2764          */
2765         process = filep->private_data;
2766
2767         rcu_read_lock();
2768         if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
2769             ptrace_parent(process->lead_thread) == current)
2770                 ptrace_attached = true;
2771         rcu_read_unlock();
2772
2773         if (process->lead_thread != current->group_leader
2774             && !ptrace_attached) {
2775                 dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
2776                 retcode = -EBADF;
2777                 goto err_i1;
2778         }
2779
2780         /* Do not trust userspace, use our own definition */
2781         func = ioctl->func;
2782
2783         if (unlikely(!func)) {
2784                 dev_dbg(kfd_device, "no function\n");
2785                 retcode = -EINVAL;
2786                 goto err_i1;
2787         }
2788
2789         /*
2790          * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
2791          * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
2792          * more priviledged access.
2793          */
2794         if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
2795                 if (!capable(CAP_CHECKPOINT_RESTORE) &&
2796                                                 !capable(CAP_SYS_ADMIN)) {
2797                         retcode = -EACCES;
2798                         goto err_i1;
2799                 }
2800         }
2801
2802         if (cmd & (IOC_IN | IOC_OUT)) {
2803                 if (asize <= sizeof(stack_kdata)) {
2804                         kdata = stack_kdata;
2805                 } else {
2806                         kdata = kmalloc(asize, GFP_KERNEL);
2807                         if (!kdata) {
2808                                 retcode = -ENOMEM;
2809                                 goto err_i1;
2810                         }
2811                 }
2812                 if (asize > usize)
2813                         memset(kdata + usize, 0, asize - usize);
2814         }
2815
2816         if (cmd & IOC_IN) {
2817                 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
2818                         retcode = -EFAULT;
2819                         goto err_i1;
2820                 }
2821         } else if (cmd & IOC_OUT) {
2822                 memset(kdata, 0, usize);
2823         }
2824
2825         retcode = func(filep, process, kdata);
2826
2827         if (cmd & IOC_OUT)
2828                 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
2829                         retcode = -EFAULT;
2830
2831 err_i1:
2832         if (!ioctl)
2833                 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
2834                           task_pid_nr(current), cmd, nr);
2835
2836         if (kdata != stack_kdata)
2837                 kfree(kdata);
2838
2839         if (retcode)
2840                 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
2841                                 nr, arg, retcode);
2842
2843         return retcode;
2844 }
2845
2846 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
2847                       struct vm_area_struct *vma)
2848 {
2849         phys_addr_t address;
2850         int ret;
2851
2852         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2853                 return -EINVAL;
2854
2855         address = dev->adev->rmmio_remap.bus_addr;
2856
2857         vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
2858                                 VM_DONTDUMP | VM_PFNMAP;
2859
2860         vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
2861
2862         pr_debug("pasid 0x%x mapping mmio page\n"
2863                  "     target user address == 0x%08llX\n"
2864                  "     physical address    == 0x%08llX\n"
2865                  "     vm_flags            == 0x%04lX\n"
2866                  "     size                == 0x%04lX\n",
2867                  process->pasid, (unsigned long long) vma->vm_start,
2868                  address, vma->vm_flags, PAGE_SIZE);
2869
2870         ret = io_remap_pfn_range(vma,
2871                                 vma->vm_start,
2872                                 address >> PAGE_SHIFT,
2873                                 PAGE_SIZE,
2874                                 vma->vm_page_prot);
2875         return ret;
2876 }
2877
2878
2879 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
2880 {
2881         struct kfd_process *process;
2882         struct kfd_dev *dev = NULL;
2883         unsigned long mmap_offset;
2884         unsigned int gpu_id;
2885
2886         process = kfd_get_process(current);
2887         if (IS_ERR(process))
2888                 return PTR_ERR(process);
2889
2890         mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
2891         gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2892         if (gpu_id)
2893                 dev = kfd_device_by_id(gpu_id);
2894
2895         switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2896         case KFD_MMAP_TYPE_DOORBELL:
2897                 if (!dev)
2898                         return -ENODEV;
2899                 return kfd_doorbell_mmap(dev, process, vma);
2900
2901         case KFD_MMAP_TYPE_EVENTS:
2902                 return kfd_event_mmap(process, vma);
2903
2904         case KFD_MMAP_TYPE_RESERVED_MEM:
2905                 if (!dev)
2906                         return -ENODEV;
2907                 return kfd_reserved_mem_mmap(dev, process, vma);
2908         case KFD_MMAP_TYPE_MMIO:
2909                 if (!dev)
2910                         return -ENODEV;
2911                 return kfd_mmio_mmap(dev, process, vma);
2912         }
2913
2914         return -EFAULT;
2915 }