drm/amdkfd: Fix kfd_process_device_init_vm error handling
authorPhilip Yang <Philip.Yang@amd.com>
Wed, 14 Dec 2022 15:15:17 +0000 (10:15 -0500)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 12 Jan 2023 11:02:37 +0000 (12:02 +0100)
[ Upstream commit 29d48b87db64b6697ddad007548e51d032081c59 ]

Should only destroy the ib_mem and let process cleanup worker to free
the outstanding BOs. Reset the pointer in pdd->qpd structure, to avoid
NULL pointer access in process destroy worker.

 BUG: kernel NULL pointer dereference, address: 0000000000000010
 Call Trace:
  amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel+0x46/0xb0 [amdgpu]
  kfd_process_device_destroy_cwsr_dgpu+0x40/0x70 [amdgpu]
  kfd_process_destroy_pdds+0x71/0x190 [amdgpu]
  kfd_process_wq_release+0x2a2/0x3b0 [amdgpu]
  process_one_work+0x2a1/0x600
  worker_thread+0x39/0x3d0

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 951b636..9821fa9 100644 (file)
@@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void)
 }
 
 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
-                       struct kfd_process_device *pdd, void *kptr)
+                       struct kfd_process_device *pdd, void **kptr)
 {
        struct kfd_dev *dev = pdd->dev;
 
-       if (kptr) {
+       if (kptr && *kptr) {
                amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
-               kptr = NULL;
+               *kptr = NULL;
        }
 
        amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
@@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
        if (!qpd->ib_kaddr || !qpd->ib_base)
                return;
 
-       kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
+       kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
 }
 
 struct kfd_process *kfd_create_process(struct file *filep)
@@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
        if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
                return;
 
-       kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
+       kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
 }
 
 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
@@ -1598,8 +1598,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
        return 0;
 
 err_init_cwsr:
+       kfd_process_device_destroy_ib_mem(pdd);
 err_reserve_ib_mem:
-       kfd_process_device_free_bos(pdd);
        pdd->drm_priv = NULL;
 
        return ret;