drm/amdgpu: use doorbell mgr for kfd kernel doorbells
authorShashank Sharma <shashank.sharma@amd.com>
Fri, 14 Jul 2023 13:31:20 +0000 (15:31 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 7 Aug 2023 21:14:07 +0000 (17:14 -0400)
This patch:
- adds a doorbell bo in kfd device structure.
- creates doorbell page for kfd kernel usages.
- updates the get_kernel_doorbell and free_kernel_doorbell functions
  accordingly

V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)
V3:
 - Move single variable declaration below (Christian)
 - Add a to-do item to reuse the KGD kernel level doorbells for
   KFD for non-MES cases, instead of reserving one page (Felix)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h

index ebc9674..b310c19 100644 (file)
@@ -455,8 +455,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
        atomic_set(&kfd->compute_profile, 0);
 
        mutex_init(&kfd->doorbell_mutex);
-       memset(&kfd->doorbell_available_index, 0,
-               sizeof(kfd->doorbell_available_index));
 
        ida_init(&kfd->doorbell_ida);
 
index 6421b62..dad81c1 100644 (file)
@@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
 /* Doorbell calculations for device init. */
 int kfd_doorbell_init(struct kfd_dev *kfd)
 {
-       size_t doorbell_start_offset;
-       size_t doorbell_aperture_size;
-       size_t doorbell_process_limit;
+       int size = PAGE_SIZE;
+       int r;
 
        /*
-        * With MES enabled, just set the doorbell base as it is needed
-        * to calculate doorbell physical address.
-        */
-       if (kfd->shared_resources.enable_mes) {
-               kfd->doorbell_base =
-                       kfd->shared_resources.doorbell_physical_address;
-               return 0;
-       }
-
-       /*
-        * We start with calculations in bytes because the input data might
-        * only be byte-aligned.
-        * Only after we have done the rounding can we assume any alignment.
+        * Todo: KFD kernel level operations need only one doorbell for
+        * ring test/HWS. So instead of reserving a whole page here for
+        * kernel, reserve and consume a doorbell from existing KGD kernel
+        * doorbell page.
         */
 
-       doorbell_start_offset =
-                       roundup(kfd->shared_resources.doorbell_start_offset,
-                                       kfd_doorbell_process_slice(kfd));
-
-       doorbell_aperture_size =
-                       rounddown(kfd->shared_resources.doorbell_aperture_size,
-                                       kfd_doorbell_process_slice(kfd));
-
-       if (doorbell_aperture_size > doorbell_start_offset)
-               doorbell_process_limit =
-                       (doorbell_aperture_size - doorbell_start_offset) /
-                                               kfd_doorbell_process_slice(kfd);
-       else
-               return -ENOSPC;
-
-       if (!kfd->max_doorbell_slices ||
-           doorbell_process_limit < kfd->max_doorbell_slices)
-               kfd->max_doorbell_slices = doorbell_process_limit;
-
-       kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
-                               doorbell_start_offset;
-
-       kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
-
-       kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
-                                          kfd_doorbell_process_slice(kfd));
-
-       if (!kfd->doorbell_kernel_ptr)
+       /* Bitmap to dynamically allocate doorbells from kernel page */
+       kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
+       if (!kfd->doorbell_bitmap) {
+               DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
                return -ENOMEM;
+       }
 
-       pr_debug("Doorbell initialization:\n");
-       pr_debug("doorbell base           == 0x%08lX\n",
-                       (uintptr_t)kfd->doorbell_base);
-
-       pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
-                       kfd->doorbell_base_dw_offset);
-
-       pr_debug("doorbell_process_limit  == 0x%08lX\n",
-                       doorbell_process_limit);
-
-       pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
-                       (uintptr_t)kfd->doorbell_base);
-
-       pr_debug("doorbell aperture size  == 0x%08lX\n",
-                       kfd->shared_resources.doorbell_aperture_size);
-
-       pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
+       /* Alloc a doorbell page for KFD kernel usages */
+       r = amdgpu_bo_create_kernel(kfd->adev,
+                                   size,
+                                   PAGE_SIZE,
+                                   AMDGPU_GEM_DOMAIN_DOORBELL,
+                                   &kfd->doorbells,
+                                   NULL,
+                                   (void **)&kfd->doorbell_kernel_ptr);
+       if (r) {
+               pr_err("failed to allocate kernel doorbells\n");
+               bitmap_free(kfd->doorbell_bitmap);
+               return r;
+       }
 
+       pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
        return 0;
 }
 
 void kfd_doorbell_fini(struct kfd_dev *kfd)
 {
-       if (kfd->doorbell_kernel_ptr)
-               iounmap(kfd->doorbell_kernel_ptr);
+       bitmap_free(kfd->doorbell_bitmap);
+       amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
+                            (void **)&kfd->doorbell_kernel_ptr);
 }
 
 int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
@@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
        u32 inx;
 
        mutex_lock(&kfd->doorbell_mutex);
-       inx = find_first_zero_bit(kfd->doorbell_available_index,
-                                       KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+       inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));
 
-       __set_bit(inx, kfd->doorbell_available_index);
+       __set_bit(inx, kfd->doorbell_bitmap);
        mutex_unlock(&kfd->doorbell_mutex);
 
        if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
                return NULL;
 
-       inx *= kfd->device_info.doorbell_size / sizeof(u32);
-
-       /*
-        * Calculating the kernel doorbell offset using the first
-        * doorbell page.
-        */
-       *doorbell_off = kfd->doorbell_base_dw_offset + inx;
+       *doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);
 
        pr_debug("Get kernel queue doorbell\n"
                        "     doorbell offset   == 0x%08X\n"
@@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
 {
        unsigned int inx;
 
-       inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
-               * sizeof(u32) / kfd->device_info.doorbell_size;
+       inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
 
        mutex_lock(&kfd->doorbell_mutex);
-       __clear_bit(inx, kfd->doorbell_available_index);
+       __clear_bit(inx, kfd->doorbell_bitmap);
        mutex_unlock(&kfd->doorbell_mutex);
 }
 
index d4c9ee3..3cb472a 100644 (file)
@@ -385,6 +385,12 @@ struct kfd_dev {
        /* Track per device allocated watch points */
        uint32_t alloc_watch_ids;
        spinlock_t watch_points_lock;
+
+       /* Kernel doorbells for KFD device */
+       struct amdgpu_bo *doorbells;
+
+       /* bitmap for dynamic doorbell allocation from doorbell object */
+       unsigned long *doorbell_bitmap;
 };
 
 enum kfd_mempool {