drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole
authorJay Cornwall <jay.cornwall@amd.com>
Wed, 12 Jul 2023 20:53:44 +0000 (15:53 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 7 Aug 2023 21:14:06 +0000 (17:14 -0400)
The TBA and TMA, along with an unused IB allocation, reside at low
addresses in the VM address space. A stray VM fault which hits these
pages must be serviced by making their page table entries invalid.
The scheduler depends upon these pages being resident and fails,
preventing a debugger from inspecting the failure state.

By relocating these pages above 47 bits in the VM address space they
can only be reached when bits [63:48] are set to 1. This makes it much
less likely for a misbehaving program to generate accesses to them.
The current placement at VA (PAGE_SIZE*2) is readily hit by a NULL
access with a small offset.

Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c

index da2ca00..dd6984c 100644 (file)
@@ -330,6 +330,12 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
                pdd->gpuvm_base = SVM_USER_BASE;
                pdd->gpuvm_limit =
                        pdd->dev->kfd->shared_resources.gpuvm_size - 1;
+
+               /* dGPUs: the reserved space for kernel
+                * before SVM
+                */
+               pdd->qpd.cwsr_base = SVM_CWSR_BASE;
+               pdd->qpd.ib_base = SVM_IB_BASE;
        } else {
                /* set them to non CANONICAL addresses, and no SVM is
                 * allocated.
@@ -348,18 +354,20 @@ static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
        pdd->lds_base = MAKE_LDS_APP_BASE_V9();
        pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
-       /* Raven needs SVM to support graphic handle, etc. Leave the small
-        * reserved space before SVM on Raven as well, even though we don't
-        * have to.
-        * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
-        * are used in Thunk to reserve SVM.
-        */
-       pdd->gpuvm_base = SVM_USER_BASE;
+       pdd->gpuvm_base = PAGE_SIZE;
        pdd->gpuvm_limit =
                pdd->dev->kfd->shared_resources.gpuvm_size - 1;
 
        pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
        pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+
+       if (!pdd->dev->kfd->use_iommu_v2) {
+               /*
+                * Place TBA/TMA on opposite side of VM hole to prevent
+                * stray faults from triggering SVM on these pages.
+                */
+               pdd->qpd.cwsr_base = pdd->dev->kfd->shared_resources.gpuvm_size;
+       }
 }
 
 int kfd_init_apertures(struct kfd_process *process)
@@ -416,14 +424,6 @@ int kfd_init_apertures(struct kfd_process *process)
                                        return -EINVAL;
                                }
                        }
-
-                       if (!dev->kfd->use_iommu_v2) {
-                               /* dGPUs: the reserved space for kernel
-                                * before SVM
-                                */
-                               pdd->qpd.cwsr_base = SVM_CWSR_BASE;
-                               pdd->qpd.ib_base = SVM_IB_BASE;
-                       }
                }
 
                dev_dbg(kfd_device, "node id %u\n", id);