2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/device.h>
24 #include <linux/export.h>
25 #include <linux/err.h>
27 #include <linux/file.h>
28 #include <linux/sched.h>
29 #include <linux/slab.h>
30 #include <linux/uaccess.h>
31 #include <linux/compat.h>
32 #include <uapi/linux/kfd_ioctl.h>
33 #include <linux/time.h>
35 #include <linux/mman.h>
36 #include <linux/dma-buf.h>
37 #include <asm/processor.h>
39 #include "kfd_device_queue_manager.h"
40 #include "kfd_dbgmgr.h"
42 #include "amdgpu_amdkfd.h"
43 #include "kfd_smi_events.h"
45 static long kfd_ioctl(struct file *, unsigned int, unsigned long);
46 static int kfd_open(struct inode *, struct file *);
47 static int kfd_release(struct inode *, struct file *);
48 static int kfd_mmap(struct file *, struct vm_area_struct *);
50 static const char kfd_dev_name[] = "kfd";
52 static const struct file_operations kfd_fops = {
54 .unlocked_ioctl = kfd_ioctl,
55 .compat_ioctl = compat_ptr_ioctl,
57 .release = kfd_release,
61 static int kfd_char_dev_major = -1;
62 static struct class *kfd_class;
63 struct device *kfd_device;
65 int kfd_chardev_init(void)
69 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
70 err = kfd_char_dev_major;
72 goto err_register_chrdev;
74 kfd_class = class_create(THIS_MODULE, kfd_dev_name);
75 err = PTR_ERR(kfd_class);
76 if (IS_ERR(kfd_class))
77 goto err_class_create;
79 kfd_device = device_create(kfd_class, NULL,
80 MKDEV(kfd_char_dev_major, 0),
82 err = PTR_ERR(kfd_device);
83 if (IS_ERR(kfd_device))
84 goto err_device_create;
89 class_destroy(kfd_class);
91 unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
96 void kfd_chardev_exit(void)
98 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
99 class_destroy(kfd_class);
100 unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
104 struct device *kfd_chardev(void)
110 static int kfd_open(struct inode *inode, struct file *filep)
112 struct kfd_process *process;
113 bool is_32bit_user_mode;
115 if (iminor(inode) != 0)
118 is_32bit_user_mode = in_compat_syscall();
120 if (is_32bit_user_mode) {
122 "Process %d (32-bit) failed to open /dev/kfd\n"
123 "32-bit processes are not supported by amdkfd\n",
128 process = kfd_create_process(filep);
130 return PTR_ERR(process);
132 if (kfd_is_locked()) {
133 dev_dbg(kfd_device, "kfd is locked!\n"
134 "process %d unreferenced", process->pasid);
135 kfd_unref_process(process);
139 /* filep now owns the reference returned by kfd_create_process */
140 filep->private_data = process;
142 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
143 process->pasid, process->is_32bit_user_mode);
148 static int kfd_release(struct inode *inode, struct file *filep)
150 struct kfd_process *process = filep->private_data;
153 kfd_unref_process(process);
158 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
161 struct kfd_ioctl_get_version_args *args = data;
163 args->major_version = KFD_IOCTL_MAJOR_VERSION;
164 args->minor_version = KFD_IOCTL_MINOR_VERSION;
169 static int set_queue_properties_from_user(struct queue_properties *q_properties,
170 struct kfd_ioctl_create_queue_args *args)
172 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
173 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
177 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
178 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
182 if ((args->ring_base_address) &&
183 (!access_ok((const void __user *) args->ring_base_address,
184 sizeof(uint64_t)))) {
185 pr_err("Can't access ring base address\n");
189 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
190 pr_err("Ring size must be a power of 2 or 0\n");
194 if (!access_ok((const void __user *) args->read_pointer_address,
196 pr_err("Can't access read pointer\n");
200 if (!access_ok((const void __user *) args->write_pointer_address,
202 pr_err("Can't access write pointer\n");
206 if (args->eop_buffer_address &&
207 !access_ok((const void __user *) args->eop_buffer_address,
209 pr_debug("Can't access eop buffer");
213 if (args->ctx_save_restore_address &&
214 !access_ok((const void __user *) args->ctx_save_restore_address,
216 pr_debug("Can't access ctx save restore buffer");
220 q_properties->is_interop = false;
221 q_properties->is_gws = false;
222 q_properties->queue_percent = args->queue_percentage;
223 q_properties->priority = args->queue_priority;
224 q_properties->queue_address = args->ring_base_address;
225 q_properties->queue_size = args->ring_size;
226 q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
227 q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
228 q_properties->eop_ring_buffer_address = args->eop_buffer_address;
229 q_properties->eop_ring_buffer_size = args->eop_buffer_size;
230 q_properties->ctx_save_restore_area_address =
231 args->ctx_save_restore_address;
232 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
233 q_properties->ctl_stack_size = args->ctl_stack_size;
234 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
235 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
236 q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
237 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
238 q_properties->type = KFD_QUEUE_TYPE_SDMA;
239 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
240 q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
244 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
245 q_properties->format = KFD_QUEUE_FORMAT_AQL;
247 q_properties->format = KFD_QUEUE_FORMAT_PM4;
249 pr_debug("Queue Percentage: %d, %d\n",
250 q_properties->queue_percent, args->queue_percentage);
252 pr_debug("Queue Priority: %d, %d\n",
253 q_properties->priority, args->queue_priority);
255 pr_debug("Queue Address: 0x%llX, 0x%llX\n",
256 q_properties->queue_address, args->ring_base_address);
258 pr_debug("Queue Size: 0x%llX, %u\n",
259 q_properties->queue_size, args->ring_size);
261 pr_debug("Queue r/w Pointers: %px, %px\n",
262 q_properties->read_ptr,
263 q_properties->write_ptr);
265 pr_debug("Queue Format: %d\n", q_properties->format);
267 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
269 pr_debug("Queue CTX save area: 0x%llX\n",
270 q_properties->ctx_save_restore_area_address);
275 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
278 struct kfd_ioctl_create_queue_args *args = data;
281 unsigned int queue_id;
282 struct kfd_process_device *pdd;
283 struct queue_properties q_properties;
284 uint32_t doorbell_offset_in_process = 0;
286 memset(&q_properties, 0, sizeof(struct queue_properties));
288 pr_debug("Creating queue ioctl\n");
290 err = set_queue_properties_from_user(&q_properties, args);
294 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
295 dev = kfd_device_by_id(args->gpu_id);
297 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
301 mutex_lock(&p->mutex);
303 pdd = kfd_bind_process_to_device(dev, p);
306 goto err_bind_process;
309 pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
313 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
314 &doorbell_offset_in_process);
316 goto err_create_queue;
318 args->queue_id = queue_id;
321 /* Return gpu_id as doorbell offset for mmap usage */
322 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
323 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
324 if (KFD_IS_SOC15(dev->device_info->asic_family))
325 /* On SOC15 ASICs, include the doorbell offset within the
326 * process doorbell frame, which is 2 pages.
328 args->doorbell_offset |= doorbell_offset_in_process;
330 mutex_unlock(&p->mutex);
332 pr_debug("Queue id %d was created successfully\n", args->queue_id);
334 pr_debug("Ring buffer address == 0x%016llX\n",
335 args->ring_base_address);
337 pr_debug("Read ptr address == 0x%016llX\n",
338 args->read_pointer_address);
340 pr_debug("Write ptr address == 0x%016llX\n",
341 args->write_pointer_address);
347 mutex_unlock(&p->mutex);
351 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
355 struct kfd_ioctl_destroy_queue_args *args = data;
357 pr_debug("Destroying queue id %d for pasid 0x%x\n",
361 mutex_lock(&p->mutex);
363 retval = pqm_destroy_queue(&p->pqm, args->queue_id);
365 mutex_unlock(&p->mutex);
369 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
373 struct kfd_ioctl_update_queue_args *args = data;
374 struct queue_properties properties;
376 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
377 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
381 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
382 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
386 if ((args->ring_base_address) &&
387 (!access_ok((const void __user *) args->ring_base_address,
388 sizeof(uint64_t)))) {
389 pr_err("Can't access ring base address\n");
393 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
394 pr_err("Ring size must be a power of 2 or 0\n");
398 properties.queue_address = args->ring_base_address;
399 properties.queue_size = args->ring_size;
400 properties.queue_percent = args->queue_percentage;
401 properties.priority = args->queue_priority;
403 pr_debug("Updating queue id %d for pasid 0x%x\n",
404 args->queue_id, p->pasid);
406 mutex_lock(&p->mutex);
408 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
410 mutex_unlock(&p->mutex);
415 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
419 const int max_num_cus = 1024;
420 struct kfd_ioctl_set_cu_mask_args *args = data;
421 struct queue_properties properties;
422 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
423 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
425 if ((args->num_cu_mask % 32) != 0) {
426 pr_debug("num_cu_mask 0x%x must be a multiple of 32",
431 properties.cu_mask_count = args->num_cu_mask;
432 if (properties.cu_mask_count == 0) {
433 pr_debug("CU mask cannot be 0");
437 /* To prevent an unreasonably large CU mask size, set an arbitrary
438 * limit of max_num_cus bits. We can then just drop any CU mask bits
439 * past max_num_cus bits and just use the first max_num_cus bits.
441 if (properties.cu_mask_count > max_num_cus) {
442 pr_debug("CU mask cannot be greater than 1024 bits");
443 properties.cu_mask_count = max_num_cus;
444 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
447 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
448 if (!properties.cu_mask)
451 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
453 pr_debug("Could not copy CU mask from userspace");
454 kfree(properties.cu_mask);
458 mutex_lock(&p->mutex);
460 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
462 mutex_unlock(&p->mutex);
465 kfree(properties.cu_mask);
470 static int kfd_ioctl_get_queue_wave_state(struct file *filep,
471 struct kfd_process *p, void *data)
473 struct kfd_ioctl_get_queue_wave_state_args *args = data;
476 mutex_lock(&p->mutex);
478 r = pqm_get_wave_state(&p->pqm, args->queue_id,
479 (void __user *)args->ctl_stack_address,
480 &args->ctl_stack_used_size,
481 &args->save_area_used_size);
483 mutex_unlock(&p->mutex);
488 static int kfd_ioctl_set_memory_policy(struct file *filep,
489 struct kfd_process *p, void *data)
491 struct kfd_ioctl_set_memory_policy_args *args = data;
494 struct kfd_process_device *pdd;
495 enum cache_policy default_policy, alternate_policy;
497 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
498 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
502 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
503 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
507 dev = kfd_device_by_id(args->gpu_id);
511 mutex_lock(&p->mutex);
513 pdd = kfd_bind_process_to_device(dev, p);
519 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
520 ? cache_policy_coherent : cache_policy_noncoherent;
523 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
524 ? cache_policy_coherent : cache_policy_noncoherent;
526 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
530 (void __user *)args->alternate_aperture_base,
531 args->alternate_aperture_size))
535 mutex_unlock(&p->mutex);
540 static int kfd_ioctl_set_trap_handler(struct file *filep,
541 struct kfd_process *p, void *data)
543 struct kfd_ioctl_set_trap_handler_args *args = data;
546 struct kfd_process_device *pdd;
548 dev = kfd_device_by_id(args->gpu_id);
552 mutex_lock(&p->mutex);
554 pdd = kfd_bind_process_to_device(dev, p);
560 kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
563 mutex_unlock(&p->mutex);
568 static int kfd_ioctl_dbg_register(struct file *filep,
569 struct kfd_process *p, void *data)
571 struct kfd_ioctl_dbg_register_args *args = data;
573 struct kfd_dbgmgr *dbgmgr_ptr;
574 struct kfd_process_device *pdd;
578 dev = kfd_device_by_id(args->gpu_id);
582 if (dev->device_info->asic_family == CHIP_CARRIZO) {
583 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
587 mutex_lock(&p->mutex);
588 mutex_lock(kfd_get_dbgmgr_mutex());
591 * make sure that we have pdd, if this the first queue created for
594 pdd = kfd_bind_process_to_device(dev, p);
596 status = PTR_ERR(pdd);
601 /* In case of a legal call, we have no dbgmgr yet */
602 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
604 status = kfd_dbgmgr_register(dbgmgr_ptr, p);
606 kfd_dbgmgr_destroy(dbgmgr_ptr);
608 dev->dbgmgr = dbgmgr_ptr;
611 pr_debug("debugger already registered\n");
616 mutex_unlock(kfd_get_dbgmgr_mutex());
617 mutex_unlock(&p->mutex);
622 static int kfd_ioctl_dbg_unregister(struct file *filep,
623 struct kfd_process *p, void *data)
625 struct kfd_ioctl_dbg_unregister_args *args = data;
629 dev = kfd_device_by_id(args->gpu_id);
630 if (!dev || !dev->dbgmgr)
633 if (dev->device_info->asic_family == CHIP_CARRIZO) {
634 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
638 mutex_lock(kfd_get_dbgmgr_mutex());
640 status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
642 kfd_dbgmgr_destroy(dev->dbgmgr);
646 mutex_unlock(kfd_get_dbgmgr_mutex());
652 * Parse and generate variable size data structure for address watch.
653 * Total size of the buffer and # watch points is limited in order
654 * to prevent kernel abuse. (no bearing to the much smaller HW limitation
655 * which is enforced by dbgdev module)
656 * please also note that the watch address itself are not "copied from user",
657 * since it be set into the HW in user mode values.
660 static int kfd_ioctl_dbg_address_watch(struct file *filep,
661 struct kfd_process *p, void *data)
663 struct kfd_ioctl_dbg_address_watch_args *args = data;
665 struct dbg_address_watch_info aw_info;
666 unsigned char *args_buff;
668 void __user *cmd_from_user;
669 uint64_t watch_mask_value = 0;
670 unsigned int args_idx = 0;
672 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
674 dev = kfd_device_by_id(args->gpu_id);
678 if (dev->device_info->asic_family == CHIP_CARRIZO) {
679 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
683 cmd_from_user = (void __user *) args->content_ptr;
685 /* Validate arguments */
687 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
688 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
689 (cmd_from_user == NULL))
692 /* this is the actual buffer to work with */
693 args_buff = memdup_user(cmd_from_user,
694 args->buf_size_in_bytes - sizeof(*args));
695 if (IS_ERR(args_buff))
696 return PTR_ERR(args_buff);
700 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
701 args_idx += sizeof(aw_info.num_watch_points);
703 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
704 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
707 * set watch address base pointer to point on the array base
710 aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
712 /* skip over the addresses buffer */
713 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
715 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
720 watch_mask_value = (uint64_t) args_buff[args_idx];
722 if (watch_mask_value > 0) {
724 * There is an array of masks.
725 * set watch mask base pointer to point on the array base
728 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
730 /* skip over the masks buffer */
731 args_idx += sizeof(aw_info.watch_mask) *
732 aw_info.num_watch_points;
734 /* just the NULL mask, set to NULL and skip over it */
735 aw_info.watch_mask = NULL;
736 args_idx += sizeof(aw_info.watch_mask);
739 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
744 /* Currently HSA Event is not supported for DBG */
745 aw_info.watch_event = NULL;
747 mutex_lock(kfd_get_dbgmgr_mutex());
749 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
751 mutex_unlock(kfd_get_dbgmgr_mutex());
759 /* Parse and generate fixed size data structure for wave control */
760 static int kfd_ioctl_dbg_wave_control(struct file *filep,
761 struct kfd_process *p, void *data)
763 struct kfd_ioctl_dbg_wave_control_args *args = data;
765 struct dbg_wave_control_info wac_info;
766 unsigned char *args_buff;
767 uint32_t computed_buff_size;
769 void __user *cmd_from_user;
770 unsigned int args_idx = 0;
772 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
774 /* we use compact form, independent of the packing attribute value */
775 computed_buff_size = sizeof(*args) +
776 sizeof(wac_info.mode) +
777 sizeof(wac_info.operand) +
778 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
779 sizeof(wac_info.dbgWave_msg.MemoryVA) +
780 sizeof(wac_info.trapId);
782 dev = kfd_device_by_id(args->gpu_id);
786 if (dev->device_info->asic_family == CHIP_CARRIZO) {
787 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
791 /* input size must match the computed "compact" size */
792 if (args->buf_size_in_bytes != computed_buff_size) {
793 pr_debug("size mismatch, computed : actual %u : %u\n",
794 args->buf_size_in_bytes, computed_buff_size);
798 cmd_from_user = (void __user *) args->content_ptr;
800 if (cmd_from_user == NULL)
803 /* copy the entire buffer from user */
805 args_buff = memdup_user(cmd_from_user,
806 args->buf_size_in_bytes - sizeof(*args));
807 if (IS_ERR(args_buff))
808 return PTR_ERR(args_buff);
810 /* move ptr to the start of the "pay-load" area */
811 wac_info.process = p;
813 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
814 args_idx += sizeof(wac_info.operand);
816 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
817 args_idx += sizeof(wac_info.mode);
819 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
820 args_idx += sizeof(wac_info.trapId);
822 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
823 *((uint32_t *)(&args_buff[args_idx]));
824 wac_info.dbgWave_msg.MemoryVA = NULL;
826 mutex_lock(kfd_get_dbgmgr_mutex());
828 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
829 wac_info.process, wac_info.operand,
830 wac_info.mode, wac_info.trapId,
831 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
833 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
835 pr_debug("Returned status of dbg manager is %ld\n", status);
837 mutex_unlock(kfd_get_dbgmgr_mutex());
844 static int kfd_ioctl_get_clock_counters(struct file *filep,
845 struct kfd_process *p, void *data)
847 struct kfd_ioctl_get_clock_counters_args *args = data;
850 dev = kfd_device_by_id(args->gpu_id);
852 /* Reading GPU clock counter from KGD */
853 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
855 /* Node without GPU resource */
856 args->gpu_clock_counter = 0;
858 /* No access to rdtsc. Using raw monotonic time */
859 args->cpu_clock_counter = ktime_get_raw_ns();
860 args->system_clock_counter = ktime_get_boottime_ns();
862 /* Since the counter is in nano-seconds we use 1GHz frequency */
863 args->system_clock_freq = 1000000000;
869 static int kfd_ioctl_get_process_apertures(struct file *filp,
870 struct kfd_process *p, void *data)
872 struct kfd_ioctl_get_process_apertures_args *args = data;
873 struct kfd_process_device_apertures *pAperture;
876 dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
878 args->num_of_nodes = 0;
880 mutex_lock(&p->mutex);
881 /* Run over all pdd of the process */
882 for (i = 0; i < p->n_pdds; i++) {
883 struct kfd_process_device *pdd = p->pdds[i];
886 &args->process_apertures[args->num_of_nodes];
887 pAperture->gpu_id = pdd->dev->id;
888 pAperture->lds_base = pdd->lds_base;
889 pAperture->lds_limit = pdd->lds_limit;
890 pAperture->gpuvm_base = pdd->gpuvm_base;
891 pAperture->gpuvm_limit = pdd->gpuvm_limit;
892 pAperture->scratch_base = pdd->scratch_base;
893 pAperture->scratch_limit = pdd->scratch_limit;
896 "node id %u\n", args->num_of_nodes);
898 "gpu id %u\n", pdd->dev->id);
900 "lds_base %llX\n", pdd->lds_base);
902 "lds_limit %llX\n", pdd->lds_limit);
904 "gpuvm_base %llX\n", pdd->gpuvm_base);
906 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
908 "scratch_base %llX\n", pdd->scratch_base);
910 "scratch_limit %llX\n", pdd->scratch_limit);
912 if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
915 mutex_unlock(&p->mutex);
920 static int kfd_ioctl_get_process_apertures_new(struct file *filp,
921 struct kfd_process *p, void *data)
923 struct kfd_ioctl_get_process_apertures_new_args *args = data;
924 struct kfd_process_device_apertures *pa;
928 dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
930 if (args->num_of_nodes == 0) {
931 /* Return number of nodes, so that user space can alloacate
934 mutex_lock(&p->mutex);
935 args->num_of_nodes = p->n_pdds;
939 /* Fill in process-aperture information for all available
940 * nodes, but not more than args->num_of_nodes as that is
941 * the amount of memory allocated by user
943 pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
944 args->num_of_nodes), GFP_KERNEL);
948 mutex_lock(&p->mutex);
951 args->num_of_nodes = 0;
956 /* Run over all pdd of the process */
957 for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
958 struct kfd_process_device *pdd = p->pdds[i];
960 pa[i].gpu_id = pdd->dev->id;
961 pa[i].lds_base = pdd->lds_base;
962 pa[i].lds_limit = pdd->lds_limit;
963 pa[i].gpuvm_base = pdd->gpuvm_base;
964 pa[i].gpuvm_limit = pdd->gpuvm_limit;
965 pa[i].scratch_base = pdd->scratch_base;
966 pa[i].scratch_limit = pdd->scratch_limit;
969 "gpu id %u\n", pdd->dev->id);
971 "lds_base %llX\n", pdd->lds_base);
973 "lds_limit %llX\n", pdd->lds_limit);
975 "gpuvm_base %llX\n", pdd->gpuvm_base);
977 "gpuvm_limit %llX\n", pdd->gpuvm_limit);
979 "scratch_base %llX\n", pdd->scratch_base);
981 "scratch_limit %llX\n", pdd->scratch_limit);
983 mutex_unlock(&p->mutex);
985 args->num_of_nodes = i;
987 (void __user *)args->kfd_process_device_apertures_ptr,
989 (i * sizeof(struct kfd_process_device_apertures)));
991 return ret ? -EFAULT : 0;
994 mutex_unlock(&p->mutex);
998 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
1001 struct kfd_ioctl_create_event_args *args = data;
1004 /* For dGPUs the event page is allocated in user mode. The
1005 * handle is passed to KFD with the first call to this IOCTL
1006 * through the event_page_offset field.
1008 if (args->event_page_offset) {
1009 struct kfd_dev *kfd;
1010 struct kfd_process_device *pdd;
1011 void *mem, *kern_addr;
1014 if (p->signal_page) {
1015 pr_err("Event page is already set\n");
1019 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1021 pr_err("Getting device by id failed in %s\n", __func__);
1025 mutex_lock(&p->mutex);
1026 pdd = kfd_bind_process_to_device(kfd, p);
1032 mem = kfd_process_device_translate_handle(pdd,
1033 GET_IDR_HANDLE(args->event_page_offset));
1035 pr_err("Can't find BO, offset is 0x%llx\n",
1036 args->event_page_offset);
1040 mutex_unlock(&p->mutex);
1042 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1043 mem, &kern_addr, &size);
1045 pr_err("Failed to map event page to kernel\n");
1049 err = kfd_event_page_set(p, kern_addr, size);
1051 pr_err("Failed to set event page\n");
1056 err = kfd_event_create(filp, p, args->event_type,
1057 args->auto_reset != 0, args->node_id,
1058 &args->event_id, &args->event_trigger_data,
1059 &args->event_page_offset,
1060 &args->event_slot_index);
1065 mutex_unlock(&p->mutex);
1069 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1072 struct kfd_ioctl_destroy_event_args *args = data;
1074 return kfd_event_destroy(p, args->event_id);
1077 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1080 struct kfd_ioctl_set_event_args *args = data;
1082 return kfd_set_event(p, args->event_id);
1085 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1088 struct kfd_ioctl_reset_event_args *args = data;
1090 return kfd_reset_event(p, args->event_id);
1093 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1096 struct kfd_ioctl_wait_events_args *args = data;
1099 err = kfd_wait_on_events(p, args->num_events,
1100 (void __user *)args->events_ptr,
1101 (args->wait_for_all != 0),
1102 args->timeout, &args->wait_result);
1106 static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1107 struct kfd_process *p, void *data)
1109 struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1110 struct kfd_process_device *pdd;
1111 struct kfd_dev *dev;
1114 dev = kfd_device_by_id(args->gpu_id);
1118 mutex_lock(&p->mutex);
1120 pdd = kfd_bind_process_to_device(dev, p);
1123 goto bind_process_to_device_fail;
1126 pdd->qpd.sh_hidden_private_base = args->va_addr;
1128 mutex_unlock(&p->mutex);
1130 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1131 pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1132 dev->kfd2kgd->set_scratch_backing_va(
1133 dev->kgd, args->va_addr, pdd->qpd.vmid);
1137 bind_process_to_device_fail:
1138 mutex_unlock(&p->mutex);
1142 static int kfd_ioctl_get_tile_config(struct file *filep,
1143 struct kfd_process *p, void *data)
1145 struct kfd_ioctl_get_tile_config_args *args = data;
1146 struct kfd_dev *dev;
1147 struct tile_config config;
1150 dev = kfd_device_by_id(args->gpu_id);
1154 amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1156 args->gb_addr_config = config.gb_addr_config;
1157 args->num_banks = config.num_banks;
1158 args->num_ranks = config.num_ranks;
1160 if (args->num_tile_configs > config.num_tile_configs)
1161 args->num_tile_configs = config.num_tile_configs;
1162 err = copy_to_user((void __user *)args->tile_config_ptr,
1163 config.tile_config_ptr,
1164 args->num_tile_configs * sizeof(uint32_t));
1166 args->num_tile_configs = 0;
1170 if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1171 args->num_macro_tile_configs =
1172 config.num_macro_tile_configs;
1173 err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1174 config.macro_tile_config_ptr,
1175 args->num_macro_tile_configs * sizeof(uint32_t));
1177 args->num_macro_tile_configs = 0;
1184 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1187 struct kfd_ioctl_acquire_vm_args *args = data;
1188 struct kfd_process_device *pdd;
1189 struct kfd_dev *dev;
1190 struct file *drm_file;
1193 dev = kfd_device_by_id(args->gpu_id);
1197 drm_file = fget(args->drm_fd);
1201 mutex_lock(&p->mutex);
1203 pdd = kfd_get_process_device_data(dev, p);
1209 if (pdd->drm_file) {
1210 ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1214 ret = kfd_process_device_init_vm(pdd, drm_file);
1217 /* On success, the PDD keeps the drm_file reference */
1218 mutex_unlock(&p->mutex);
1223 mutex_unlock(&p->mutex);
1228 bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1230 struct kfd_local_mem_info mem_info;
1232 if (debug_largebar) {
1233 pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1237 if (dev->use_iommu_v2)
1240 amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1241 if (mem_info.local_mem_size_private == 0 &&
1242 mem_info.local_mem_size_public > 0)
1247 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1248 struct kfd_process *p, void *data)
1250 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1251 struct kfd_process_device *pdd;
1253 struct kfd_dev *dev;
1256 uint64_t offset = args->mmap_offset;
1257 uint32_t flags = args->flags;
1259 if (args->size == 0)
1262 dev = kfd_device_by_id(args->gpu_id);
1266 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1267 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1268 !kfd_dev_is_large_bar(dev)) {
1269 pr_err("Alloc host visible vram on small bar is not allowed\n");
1273 mutex_lock(&p->mutex);
1275 pdd = kfd_bind_process_to_device(dev, p);
1281 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1282 if (args->size != kfd_doorbell_process_slice(dev)) {
1286 offset = kfd_get_process_doorbells(pdd);
1287 } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1288 if (args->size != PAGE_SIZE) {
1292 offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1299 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1300 dev->kgd, args->va_addr, args->size,
1301 pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
1307 idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1308 if (idr_handle < 0) {
1313 /* Update the VRAM usage count */
1314 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1315 WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1317 mutex_unlock(&p->mutex);
1319 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1320 args->mmap_offset = offset;
1322 /* MMIO is mapped through kfd device
1323 * Generate a kfd mmap offset
1325 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1326 args->mmap_offset = KFD_MMAP_TYPE_MMIO
1327 | KFD_MMAP_GPU_ID(args->gpu_id);
1332 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
1333 pdd->drm_priv, NULL);
1335 mutex_unlock(&p->mutex);
1339 static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1340 struct kfd_process *p, void *data)
1342 struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1343 struct kfd_process_device *pdd;
1345 struct kfd_dev *dev;
1349 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1353 mutex_lock(&p->mutex);
1355 pdd = kfd_get_process_device_data(dev, p);
1357 pr_err("Process device data doesn't exist\n");
1362 mem = kfd_process_device_translate_handle(
1363 pdd, GET_IDR_HANDLE(args->handle));
1369 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1370 (struct kgd_mem *)mem, pdd->drm_priv, &size);
1372 /* If freeing the buffer failed, leave the handle in place for
1373 * clean-up during process tear-down.
1376 kfd_process_device_remove_obj_handle(
1377 pdd, GET_IDR_HANDLE(args->handle));
1379 WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1382 mutex_unlock(&p->mutex);
1386 static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1387 struct kfd_process *p, void *data)
1389 struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1390 struct kfd_process_device *pdd, *peer_pdd;
1392 struct kfd_dev *dev, *peer;
1395 uint32_t *devices_arr = NULL;
1397 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1401 if (!args->n_devices) {
1402 pr_debug("Device IDs array empty\n");
1405 if (args->n_success > args->n_devices) {
1406 pr_debug("n_success exceeds n_devices\n");
1410 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1415 err = copy_from_user(devices_arr,
1416 (void __user *)args->device_ids_array_ptr,
1417 args->n_devices * sizeof(*devices_arr));
1420 goto copy_from_user_failed;
1423 mutex_lock(&p->mutex);
1425 pdd = kfd_bind_process_to_device(dev, p);
1428 goto bind_process_to_device_failed;
1431 mem = kfd_process_device_translate_handle(pdd,
1432 GET_IDR_HANDLE(args->handle));
1435 goto get_mem_obj_from_handle_failed;
1438 for (i = args->n_success; i < args->n_devices; i++) {
1439 peer = kfd_device_by_id(devices_arr[i]);
1441 pr_debug("Getting device by id failed for 0x%x\n",
1444 goto get_mem_obj_from_handle_failed;
1447 peer_pdd = kfd_bind_process_to_device(peer, p);
1448 if (IS_ERR(peer_pdd)) {
1449 err = PTR_ERR(peer_pdd);
1450 goto get_mem_obj_from_handle_failed;
1452 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1453 peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
1455 pr_err("Failed to map to gpu %d/%d\n",
1456 i, args->n_devices);
1457 goto map_memory_to_gpu_failed;
1459 args->n_success = i+1;
1462 mutex_unlock(&p->mutex);
1464 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1466 pr_debug("Sync memory failed, wait interrupted by user signal\n");
1467 goto sync_memory_failed;
1470 /* Flush TLBs after waiting for the page table updates to complete */
1471 for (i = 0; i < args->n_devices; i++) {
1472 peer = kfd_device_by_id(devices_arr[i]);
1473 if (WARN_ON_ONCE(!peer))
1475 peer_pdd = kfd_get_process_device_data(peer, p);
1476 if (WARN_ON_ONCE(!peer_pdd))
1478 kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
1485 bind_process_to_device_failed:
1486 get_mem_obj_from_handle_failed:
1487 map_memory_to_gpu_failed:
1488 mutex_unlock(&p->mutex);
1489 copy_from_user_failed:
1496 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1497 struct kfd_process *p, void *data)
1499 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1500 struct kfd_process_device *pdd, *peer_pdd;
1502 struct kfd_dev *dev, *peer;
1504 uint32_t *devices_arr = NULL, i;
1506 dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1510 if (!args->n_devices) {
1511 pr_debug("Device IDs array empty\n");
1514 if (args->n_success > args->n_devices) {
1515 pr_debug("n_success exceeds n_devices\n");
1519 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1524 err = copy_from_user(devices_arr,
1525 (void __user *)args->device_ids_array_ptr,
1526 args->n_devices * sizeof(*devices_arr));
1529 goto copy_from_user_failed;
1532 mutex_lock(&p->mutex);
1534 pdd = kfd_get_process_device_data(dev, p);
1537 goto bind_process_to_device_failed;
1540 mem = kfd_process_device_translate_handle(pdd,
1541 GET_IDR_HANDLE(args->handle));
1544 goto get_mem_obj_from_handle_failed;
1547 for (i = args->n_success; i < args->n_devices; i++) {
1548 peer = kfd_device_by_id(devices_arr[i]);
1551 goto get_mem_obj_from_handle_failed;
1554 peer_pdd = kfd_get_process_device_data(peer, p);
1557 goto get_mem_obj_from_handle_failed;
1559 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1560 peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
1562 pr_err("Failed to unmap from gpu %d/%d\n",
1563 i, args->n_devices);
1564 goto unmap_memory_from_gpu_failed;
1566 args->n_success = i+1;
1570 mutex_unlock(&p->mutex);
1574 bind_process_to_device_failed:
1575 get_mem_obj_from_handle_failed:
1576 unmap_memory_from_gpu_failed:
1577 mutex_unlock(&p->mutex);
1578 copy_from_user_failed:
1583 static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1584 struct kfd_process *p, void *data)
1587 struct kfd_ioctl_alloc_queue_gws_args *args = data;
1589 struct kfd_dev *dev;
1591 mutex_lock(&p->mutex);
1592 q = pqm_get_user_queue(&p->pqm, args->queue_id);
1606 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1611 retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1612 mutex_unlock(&p->mutex);
1614 args->first_gws = 0;
1618 mutex_unlock(&p->mutex);
1622 static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1623 struct kfd_process *p, void *data)
1625 struct kfd_ioctl_get_dmabuf_info_args *args = data;
1626 struct kfd_dev *dev = NULL;
1627 struct kgd_dev *dma_buf_kgd;
1628 void *metadata_buffer = NULL;
1633 /* Find a KFD GPU device that supports the get_dmabuf_info query */
1634 for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1640 if (args->metadata_ptr) {
1641 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1642 if (!metadata_buffer)
1646 /* Get dmabuf info from KGD */
1647 r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1648 &dma_buf_kgd, &args->size,
1649 metadata_buffer, args->metadata_size,
1650 &args->metadata_size, &flags);
1654 /* Reverse-lookup gpu_id from kgd pointer */
1655 dev = kfd_device_by_kgd(dma_buf_kgd);
1660 args->gpu_id = dev->id;
1661 args->flags = flags;
1663 /* Copy metadata buffer to user mode */
1664 if (metadata_buffer) {
1665 r = copy_to_user((void __user *)args->metadata_ptr,
1666 metadata_buffer, args->metadata_size);
1672 kfree(metadata_buffer);
1677 static int kfd_ioctl_import_dmabuf(struct file *filep,
1678 struct kfd_process *p, void *data)
1680 struct kfd_ioctl_import_dmabuf_args *args = data;
1681 struct kfd_process_device *pdd;
1682 struct dma_buf *dmabuf;
1683 struct kfd_dev *dev;
1689 dev = kfd_device_by_id(args->gpu_id);
1693 dmabuf = dma_buf_get(args->dmabuf_fd);
1695 return PTR_ERR(dmabuf);
1697 mutex_lock(&p->mutex);
1699 pdd = kfd_bind_process_to_device(dev, p);
1705 r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1706 args->va_addr, pdd->drm_priv,
1707 (struct kgd_mem **)&mem, &size,
1712 idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1713 if (idr_handle < 0) {
1718 mutex_unlock(&p->mutex);
1719 dma_buf_put(dmabuf);
1721 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1726 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
1727 pdd->drm_priv, NULL);
1729 mutex_unlock(&p->mutex);
1730 dma_buf_put(dmabuf);
1734 /* Handle requests for watching SMI events */
1735 static int kfd_ioctl_smi_events(struct file *filep,
1736 struct kfd_process *p, void *data)
1738 struct kfd_ioctl_smi_events_args *args = data;
1739 struct kfd_dev *dev;
1741 dev = kfd_device_by_id(args->gpuid);
1745 return kfd_smi_event_open(dev, &args->anon_fd);
1748 static int kfd_ioctl_set_xnack_mode(struct file *filep,
1749 struct kfd_process *p, void *data)
1751 struct kfd_ioctl_set_xnack_mode_args *args = data;
1754 mutex_lock(&p->mutex);
1755 if (args->xnack_enabled >= 0) {
1756 if (!list_empty(&p->pqm.queues)) {
1757 pr_debug("Process has user queues running\n");
1758 mutex_unlock(&p->mutex);
1761 if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
1764 p->xnack_enabled = args->xnack_enabled;
1766 args->xnack_enabled = p->xnack_enabled;
1768 mutex_unlock(&p->mutex);
1773 #if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1774 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1776 struct kfd_ioctl_svm_args *args = data;
1779 pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
1780 args->start_addr, args->size, args->op, args->nattr);
1782 if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
1784 if (!args->start_addr || !args->size)
1787 mutex_lock(&p->mutex);
1789 r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
1792 mutex_unlock(&p->mutex);
1797 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1803 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1804 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1805 .cmd_drv = 0, .name = #ioctl}
1808 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1809 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1810 kfd_ioctl_get_version, 0),
1812 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1813 kfd_ioctl_create_queue, 0),
1815 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1816 kfd_ioctl_destroy_queue, 0),
1818 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1819 kfd_ioctl_set_memory_policy, 0),
1821 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1822 kfd_ioctl_get_clock_counters, 0),
1824 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1825 kfd_ioctl_get_process_apertures, 0),
1827 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1828 kfd_ioctl_update_queue, 0),
1830 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1831 kfd_ioctl_create_event, 0),
1833 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1834 kfd_ioctl_destroy_event, 0),
1836 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1837 kfd_ioctl_set_event, 0),
1839 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1840 kfd_ioctl_reset_event, 0),
1842 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1843 kfd_ioctl_wait_events, 0),
1845 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1846 kfd_ioctl_dbg_register, 0),
1848 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1849 kfd_ioctl_dbg_unregister, 0),
1851 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1852 kfd_ioctl_dbg_address_watch, 0),
1854 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1855 kfd_ioctl_dbg_wave_control, 0),
1857 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1858 kfd_ioctl_set_scratch_backing_va, 0),
1860 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1861 kfd_ioctl_get_tile_config, 0),
1863 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1864 kfd_ioctl_set_trap_handler, 0),
1866 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1867 kfd_ioctl_get_process_apertures_new, 0),
1869 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1870 kfd_ioctl_acquire_vm, 0),
1872 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1873 kfd_ioctl_alloc_memory_of_gpu, 0),
1875 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1876 kfd_ioctl_free_memory_of_gpu, 0),
1878 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1879 kfd_ioctl_map_memory_to_gpu, 0),
1881 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1882 kfd_ioctl_unmap_memory_from_gpu, 0),
1884 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1885 kfd_ioctl_set_cu_mask, 0),
1887 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1888 kfd_ioctl_get_queue_wave_state, 0),
1890 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1891 kfd_ioctl_get_dmabuf_info, 0),
1893 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1894 kfd_ioctl_import_dmabuf, 0),
1896 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1897 kfd_ioctl_alloc_queue_gws, 0),
1899 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1900 kfd_ioctl_smi_events, 0),
1902 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
1904 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
1905 kfd_ioctl_set_xnack_mode, 0),
1908 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1910 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1912 struct kfd_process *process;
1913 amdkfd_ioctl_t *func;
1914 const struct amdkfd_ioctl_desc *ioctl = NULL;
1915 unsigned int nr = _IOC_NR(cmd);
1916 char stack_kdata[128];
1918 unsigned int usize, asize;
1919 int retcode = -EINVAL;
1921 if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1924 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1927 ioctl = &amdkfd_ioctls[nr];
1929 amdkfd_size = _IOC_SIZE(ioctl->cmd);
1930 usize = asize = _IOC_SIZE(cmd);
1931 if (amdkfd_size > asize)
1932 asize = amdkfd_size;
1938 dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1940 /* Get the process struct from the filep. Only the process
1941 * that opened /dev/kfd can use the file descriptor. Child
1942 * processes need to create their own KFD device context.
1944 process = filep->private_data;
1945 if (process->lead_thread != current->group_leader) {
1946 dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1951 /* Do not trust userspace, use our own definition */
1954 if (unlikely(!func)) {
1955 dev_dbg(kfd_device, "no function\n");
1960 if (cmd & (IOC_IN | IOC_OUT)) {
1961 if (asize <= sizeof(stack_kdata)) {
1962 kdata = stack_kdata;
1964 kdata = kmalloc(asize, GFP_KERNEL);
1971 memset(kdata + usize, 0, asize - usize);
1975 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1979 } else if (cmd & IOC_OUT) {
1980 memset(kdata, 0, usize);
1983 retcode = func(filep, process, kdata);
1986 if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1991 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1992 task_pid_nr(current), cmd, nr);
1994 if (kdata != stack_kdata)
1998 dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
2004 static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
2005 struct vm_area_struct *vma)
2007 phys_addr_t address;
2010 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2013 address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
2015 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
2016 VM_DONTDUMP | VM_PFNMAP;
2018 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
2020 pr_debug("pasid 0x%x mapping mmio page\n"
2021 " target user address == 0x%08llX\n"
2022 " physical address == 0x%08llX\n"
2023 " vm_flags == 0x%04lX\n"
2024 " size == 0x%04lX\n",
2025 process->pasid, (unsigned long long) vma->vm_start,
2026 address, vma->vm_flags, PAGE_SIZE);
2028 ret = io_remap_pfn_range(vma,
2030 address >> PAGE_SHIFT,
2037 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
2039 struct kfd_process *process;
2040 struct kfd_dev *dev = NULL;
2041 unsigned long mmap_offset;
2042 unsigned int gpu_id;
2044 process = kfd_get_process(current);
2045 if (IS_ERR(process))
2046 return PTR_ERR(process);
2048 mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
2049 gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2051 dev = kfd_device_by_id(gpu_id);
2053 switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2054 case KFD_MMAP_TYPE_DOORBELL:
2057 return kfd_doorbell_mmap(dev, process, vma);
2059 case KFD_MMAP_TYPE_EVENTS:
2060 return kfd_event_mmap(process, vma);
2062 case KFD_MMAP_TYPE_RESERVED_MEM:
2065 return kfd_reserved_mem_mmap(dev, process, vma);
2066 case KFD_MMAP_TYPE_MMIO:
2069 return kfd_mmio_mmap(dev, process, vma);