drm/amdkfd: Move process doorbell allocation into kfd device
authorMukul Joshi <mukul.joshi@amd.com>
Fri, 18 Sep 2020 20:45:45 +0000 (16:45 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 22 Sep 2020 16:25:02 +0000 (12:25 -0400)
Move doorbell allocation for a process into kfd device and
allocate doorbell space in each PDD during process creation.
Currently, KFD manages its own doorbell space but for some
devices, amdgpu would allocate the complete doorbell
space instead of leaving a chunk of doorbell space for KFD to
manage. In a system with mix of such devices, KFD would need
to request process doorbell space based on the type of device,
either from amdgpu or from its own doorbell space.

Signed-off-by: Mukul Joshi <mukul.joshi@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 297484ca7d193050be4a88d92a8ab603b14045c7..222f1df1a6b6d91312c5abd49d45f6cb0a7b88c2 100644 (file)
@@ -1291,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
                return -EINVAL;
        }
 
-       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
-               if (args->size != kfd_doorbell_process_slice(dev))
-                       return -EINVAL;
-               offset = kfd_get_process_doorbells(dev, p);
-       } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
-               if (args->size != PAGE_SIZE)
-                       return -EINVAL;
-               offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
-               if (!offset)
-                       return -ENOMEM;
-       }
-
        mutex_lock(&p->mutex);
 
        pdd = kfd_bind_process_to_device(dev, p);
@@ -1311,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
                goto err_unlock;
        }
 
+       if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+               if (args->size != kfd_doorbell_process_slice(dev)) {
+                       err = -EINVAL;
+                       goto err_unlock;
+               }
+               offset = kfd_get_process_doorbells(pdd);
+       } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
+               if (args->size != PAGE_SIZE) {
+                       err = -EINVAL;
+                       goto err_unlock;
+               }
+               offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
+               if (!offset) {
+                       err = -ENOMEM;
+                       goto err_unlock;
+               }
+       }
+
        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                dev->kgd, args->va_addr, args->size,
                pdd->vm, (struct kgd_mem **) &mem, &offset,
index e3fc6ed7b79c394b3c0624b44e86b3d507f4d6a8..135001a404bc7b585cd7d7b64806fe88965f59f6 100644 (file)
@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 
        atomic_set(&kfd->sram_ecc_flag, 0);
 
+       ida_init(&kfd->doorbell_ida);
+
        return kfd;
 }
 
@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
                kfd_interrupt_exit(kfd);
                kfd_topology_remove_device(kfd);
                kfd_doorbell_fini(kfd);
+               ida_destroy(&kfd->doorbell_ida);
                kfd_gtt_sa_fini(kfd);
                amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
                if (kfd->gws)
index ed362ab8ec21a478eeff8594b1cca11fb28d2fb7..62504d5fa42b1bdf92cce21b0c098f954befc705 100644 (file)
@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
        }
 
        q->properties.doorbell_off =
-               kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
+               kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
                                          q->doorbell_id);
-
        return 0;
 }
 
index 8e0c00b9555eddd4235f3a792a0c4ed56f0ae5f0..768d153acff42991317784861f87ba13efd71e4c 100644 (file)
@@ -31,9 +31,6 @@
  * kernel queues using the first doorbell page reserved for the kernel.
  */
 
-static DEFINE_IDA(doorbell_ida);
-static unsigned int max_doorbell_slices;
-
 /*
  * Each device exposes a doorbell aperture, a PCI MMIO aperture that
  * receives 32-bit writes that are passed to queues as wptr values.
@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
        else
                return -ENOSPC;
 
-       if (!max_doorbell_slices ||
-           doorbell_process_limit < max_doorbell_slices)
-               max_doorbell_slices = doorbell_process_limit;
+       if (!kfd->max_doorbell_slices ||
+           doorbell_process_limit < kfd->max_doorbell_slices)
+               kfd->max_doorbell_slices = doorbell_process_limit;
 
        kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
                                doorbell_start_offset;
@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
                      struct vm_area_struct *vma)
 {
        phys_addr_t address;
+       struct kfd_process_device *pdd;
 
        /*
         * For simplicitly we only allow mapping of the entire doorbell
@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
        if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
                return -EINVAL;
 
-       /* Calculate physical address of doorbell */
-       address = kfd_get_process_doorbells(dev, process);
+       pdd = kfd_get_process_device_data(dev, process);
+       if (!pdd)
+               return -EINVAL;
 
+       /* Calculate physical address of doorbell */
+       address = kfd_get_process_doorbells(pdd);
        vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
                                VM_DONTDUMP | VM_PFNMAP;
 
@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
 }
 
 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
-                                       struct kfd_process *process,
+                                       struct kfd_process_device *pdd,
                                        unsigned int doorbell_id)
 {
        /*
@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
         * units regardless of the ASIC-dependent doorbell size.
         */
        return kfd->doorbell_base_dw_offset +
-               process->doorbell_index
+               pdd->doorbell_index
                * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
                doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
 }
@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
 
 }
 
-phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
-                                       struct kfd_process *process)
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
 {
-       return dev->doorbell_base +
-               process->doorbell_index * kfd_doorbell_process_slice(dev);
+       return pdd->dev->doorbell_base +
+               pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
 }
 
-int kfd_alloc_process_doorbells(struct kfd_process *process)
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
 {
-       int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
+       int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
                                GFP_KERNEL);
        if (r > 0)
-               process->doorbell_index = r;
+               *doorbell_index = r;
 
        return r;
 }
 
-void kfd_free_process_doorbells(struct kfd_process *process)
+void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
 {
-       if (process->doorbell_index)
-               ida_simple_remove(&doorbell_ida, process->doorbell_index);
+       if (doorbell_index)
+               ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
 }
index 8c2b8ccd27fb7127fde2462e9cbb473d4c95fc7b..739db04080d0f73426aa8dfba1b17ce7f29b7b4a 100644 (file)
@@ -314,6 +314,9 @@ struct kfd_dev {
        spinlock_t smi_lock;
 
        uint32_t reset_seq_num;
+
+       struct ida doorbell_ida;
+       unsigned int max_doorbell_slices;
 };
 
 enum kfd_mempool {
@@ -699,6 +702,7 @@ struct kfd_process_device {
        struct attribute attr_evict;
 
        struct kobject *kobj_stats;
+       unsigned int doorbell_index;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -736,7 +740,6 @@ struct kfd_process {
        struct mmu_notifier mmu_notifier;
 
        uint16_t pasid;
-       unsigned int doorbell_index;
 
        /*
         * List of kfd_process_device structures,
@@ -869,13 +872,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
 void write_kernel_doorbell(void __iomem *db, u32 value);
 void write_kernel_doorbell64(void __iomem *db, u64 value);
 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
-                                       struct kfd_process *process,
+                                       struct kfd_process_device *pdd,
                                        unsigned int doorbell_id);
-phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
-                                       struct kfd_process *process);
-int kfd_alloc_process_doorbells(struct kfd_process *process);
-void kfd_free_process_doorbells(struct kfd_process *process);
-
+phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
+int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
+                               unsigned int *doorbell_index);
+void kfd_free_process_doorbells(struct kfd_dev *kfd,
+                               unsigned int doorbell_index);
 /* GTT Sub-Allocator */
 
 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
index ad53b2668221fe44190b777b9dac30564b5e1cd0..e2b6d31d93ce8512179fb40b1e1f63c4bdc472f5 100644 (file)
@@ -876,6 +876,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
                kfree(pdd->qpd.doorbell_bitmap);
                idr_destroy(&pdd->alloc_idr);
 
+               kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
+
                /*
                 * before destroying pdd, make sure to report availability
                 * for auto suspend
@@ -932,8 +934,6 @@ static void kfd_process_wq_release(struct work_struct *work)
        kfd_event_free_process(p);
 
        kfd_pasid_free(p->pasid);
-       kfd_free_process_doorbells(p);
-
        mutex_destroy(&p->mutex);
 
        put_task_struct(p->lead_thread);
@@ -1111,9 +1111,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
        if (process->pasid == 0)
                goto err_alloc_pasid;
 
-       if (kfd_alloc_process_doorbells(process) < 0)
-               goto err_alloc_doorbells;
-
        err = pqm_init(&process->pqm, process);
        if (err != 0)
                goto err_process_pqm_init;
@@ -1141,8 +1138,6 @@ err_register_notifier:
 err_init_apertures:
        pqm_uninit(&process->pqm);
 err_process_pqm_init:
-       kfd_free_process_doorbells(process);
-err_alloc_doorbells:
        kfd_pasid_free(process->pasid);
 err_alloc_pasid:
        mutex_destroy(&process->mutex);
@@ -1205,10 +1200,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        if (!pdd)
                return NULL;
 
+       if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
+               pr_err("Failed to alloc doorbell for pdd\n");
+               goto err_free_pdd;
+       }
+
        if (init_doorbell_bitmap(&pdd->qpd, dev)) {
                pr_err("Failed to init doorbell for process\n");
-               kfree(pdd);
-               return NULL;
+               goto err_free_pdd;
        }
 
        pdd->dev = dev;
@@ -1231,6 +1230,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
        idr_init(&pdd->alloc_idr);
 
        return pdd;
+
+err_free_pdd:
+       kfree(pdd);
+       return NULL;
 }
 
 /**