drm/amdkfd: Move process doorbell allocation into kfd device
authorMukul Joshi <mukul.joshi@amd.com>
Fri, 18 Sep 2020 20:45:45 +0000 (16:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 22 Sep 2020 16:25:02 +0000 (12:25 -0400)
Move doorbell allocation for a process into kfd device and
allocate doorbell space in each PDD during process creation.
Currently, KFD manages its own doorbell space but for some
devices, amdgpu would allocate the complete doorbell
space instead of leaving a chunk of doorbell space for KFD to
manage. In a system with mix of such devices, KFD would need
to request process doorbell space based on the type of device,
either from amdgpu or from its own doorbell space.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 297484c..222f1df 100644 (file)
@@ -1291,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
                return -EINVAL;
        }
 
-       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
-               if (args->size != kfd_doorbell_process_slice(dev))
-                       return -EINVAL;
-               offset = kfd_get_process_doorbells(dev, p);
-       } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
-               if (args->size != PAGE_SIZE)
-                       return -EINVAL;
-               offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
-               if (!offset)
-                       return -ENOMEM;
-       }
-
        mutex_lock(&p->mutex);
 
        pdd = kfd_bind_process_to_device(dev, p);
@@ -1311,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
                goto err_unlock;
        }
 
+       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+               if (args->size != kfd_doorbell_process_slice(dev)) {
+                       err = -EINVAL;
+                       goto err_unlock;
+               }
+               offset = kfd_get_process_doorbells(pdd);
+       } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+               if (args->size != PAGE_SIZE) {
+                       err = -EINVAL;
+                       goto err_unlock;
+               }
+               offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+               if (!offset) {
+                       err = -ENOMEM;
+                       goto err_unlock;
+               }
+       }
+
        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                dev->kgd, args->va_addr, args->size,
                pdd->vm, (struct kgd_mem **) &mem, &offset,
index e3fc6ed..135001a 100644 (file)
@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 
        atomic_set(&kfd->sram_ecc_flag, 0);
 
+       ida_init(&kfd->doorbell_ida);
+
        return kfd;
 }
 
@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
                kfd_interrupt_exit(kfd);
                kfd_topology_remove_device(kfd);
                kfd_doorbell_fini(kfd);
+               ida_destroy(&kfd->doorbell_ida);
                kfd_gtt_sa_fini(kfd);
                amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
                if (kfd->gws)
index ed362ab..62504d5 100644 (file)
@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
        }
 
        q->properties.doorbell_off =
-               kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
+               kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
                                          q->doorbell_id);
-
        return 0;
 }
 
index 8e0c00b..768d153 100644 (file)
@@ -31,9 +31,6 @@
  * kernel queues using the first doorbell page reserved for the kernel.
  */
 
-static DEFINE_IDA(doorbell_ida);
-static unsigned int max_doorbell_slices;
-
 /*
  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
  * receives 32-bit writes that are passed to queues as wptr values.
@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
        else
                return -ENOSPC;
 
-       if (!max_doorbell_slices ||
-           doorbell_process_limit < max_doorbell_slices)
-               max_doorbell_slices = doorbell_process_limit;
+       if (!kfd->max_doorbell_slices ||
+           doorbell_process_limit < kfd->max_doorbell_slices)
+               kfd->max_doorbell_slices = doorbell_process_limit;
 
        kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
                                doorbell_start_offset;
@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
                      struct vm_area_struct *vma)
 {
        phys_addr_t address;
+       struct kfd_process_device *pdd;
 
        /*
         * For simplicitly we only allow mapping of the entire doorbell
@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
        if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
                return -EINVAL;
 
-       /* Calculate physical address of doorbell */
-       address = kfd_get_process_doorbells(dev, process);
+       pdd = kfd_get_process_device_data(dev, process);
+       if (!pdd)
+               return -EINVAL;
 
+       /* Calculate physical address of doorbell */
+       address = kfd_get_process_doorbells(pdd);
        vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
                                VM_DONTDUMP | VM_PFNMAP;
 
@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
 }
 
 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
-                                       struct kfd_process *process,
+                                       struct kfd_process_device *pdd,
                                        unsigned int doorbell_id)
 {
        /*
@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
         * units regardless of the ASIC-dependent doorbell size.
         */
        return kfd->doorbell_base_dw_offset +
-               process->doorbell_index
+               pdd->doorbell_index
                * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
                doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
 }
@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
 
 }
 
-phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
-                                       struct kfd_process *process)
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
 {
-       return dev->doorbell_base +
-               process->doorbell_index * kfd_doorbell_process_slice(dev);
+       return pdd->dev->doorbell_base +
+               pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
 }
 
-int kfd_alloc_process_doorbells(struct kfd_process *process)
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
 {
-       int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
+       int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
                                GFP_KERNEL);
        if (r > 0)
-               process->doorbell_index = r;
+               *doorbell_index = r;
 
        return r;
 }
 
-void kfd_free_process_doorbells(struct kfd_process *process)
+void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
 {
-       if (process->doorbell_index)
-               ida_simple_remove(&doorbell_ida, process->doorbell_index);
+       if (doorbell_index)
+               ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
 }
index 8c2b8cc..739db04 100644 (file)
@@ -314,6 +314,9 @@ struct kfd_dev {
        spinlock_t smi_lock;
 
        uint32_t reset_seq_num;
+
+       struct ida doorbell_ida;
+       unsigned int max_doorbell_slices;
 };
 
 enum kfd_mempool {
@@ -699,6 +702,7 @@ struct kfd_process_device {
        struct attribute attr_evict;
 
        struct kobject *kobj_stats;
+       unsigned int doorbell_index;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -736,7 +740,6 @@ struct kfd_process {
        struct mmu_notifier mmu_notifier;
 
        uint16_t pasid;
-       unsigned int doorbell_index;
 
        /*
         * List of kfd_process_device structures,
@@ -869,13 +872,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
 void write_kernel_doorbell(void __iomem *db, u32 value);
 void write_kernel_doorbell64(void __iomem *db, u64 value);
 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
-                                       struct kfd_process *process,
+                                       struct kfd_process_device *pdd,
                                        unsigned int doorbell_id);
-phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
-                                       struct kfd_process *process);
-int kfd_alloc_process_doorbells(struct kfd_process *process);
-void kfd_free_process_doorbells(struct kfd_process *process);
-
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
+                               unsigned int *doorbell_index);
+void kfd_free_process_doorbells(struct kfd_dev *kfd,
+                               unsigned int doorbell_index);
 /* GTT Sub-Allocator */
 
 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
index ad53b26..e2b6d31 100644 (file)
@@ -876,6 +876,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
                kfree(pdd->qpd.doorbell_bitmap);
                idr_destroy(&pdd->alloc_idr);
 
+               kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
+
                /*
                 * before destroying pdd, make sure to report availability
                 * for auto suspend
@@ -932,8 +934,6 @@ static void kfd_process_wq_release(struct work_struct *work)
        kfd_event_free_process(p);
 
        kfd_pasid_free(p->pasid);
-       kfd_free_process_doorbells(p);
-
        mutex_destroy(&p->mutex);
 
        put_task_struct(p->lead_thread);
@@ -1111,9 +1111,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
        if (process->pasid == 0)
                goto err_alloc_pasid;
 
-       if (kfd_alloc_process_doorbells(process) < 0)
-               goto err_alloc_doorbells;
-
        err = pqm_init(&process->pqm, process);
        if (err != 0)
                goto err_process_pqm_init;
@@ -1141,8 +1138,6 @@ err_register_notifier:
 err_init_apertures:
        pqm_uninit(&process->pqm);
 err_process_pqm_init:
-       kfd_free_process_doorbells(process);
-err_alloc_doorbells:
        kfd_pasid_free(process->pasid);
 err_alloc_pasid:
        mutex_destroy(&process->mutex);
@@ -1205,10 +1200,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        if (!pdd)
                return NULL;
 
+       if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
+               pr_err("Failed to alloc doorbell for pdd\n");
+               goto err_free_pdd;
+       }
+
        if (init_doorbell_bitmap(&pdd->qpd, dev)) {
                pr_err("Failed to init doorbell for process\n");
-               kfree(pdd);
-               return NULL;
+               goto err_free_pdd;
        }
 
        pdd->dev = dev;
@@ -1231,6 +1230,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        idr_init(&pdd->alloc_idr);
 
        return pdd;
+
+err_free_pdd:
+       kfree(pdd);
+       return NULL;
 }
 
 /**