drm/amdgpu: Remove kfd eviction fence before release bo (v2)
authorxinhui pan <xinhui.pan@amd.com>
Tue, 11 Feb 2020 03:28:34 +0000 (11:28 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 26 Feb 2020 19:17:32 +0000 (14:17 -0500)
No need to trigger eviction as the memory mapping will not be used
anymore.

All pt/pd bos share same resv, hence the same shared eviction fence.
Everytime page table is freed, the fence will be signled and that cuases
kfd unexcepted evictions.

v2: squash in 32 bit fix

CC: Christian König <christian.koenig@amd.com>
CC: Felix Kuehling <felix.kuehling@amd.com>
CC: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: xinhui pan <xinhui.pan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c

index e1c2c182898f87780a501301d48b6627fb7acfc2..514551da33ebecd56fec56620ba016dcfe8fbf17 100644 (file)
@@ -673,6 +673,11 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 {
 }
 
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+       return 0;
+}
+
 void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
                                        struct amdgpu_vm *vm)
 {
index 9e8db702d878aa60ffbc5d6c2df0736a6b403f9c..0ee8aae6c519cbe5ab0b180f33eda207dd34f9b4 100644 (file)
@@ -96,6 +96,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
                                                       struct mm_struct *mm);
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
 
 struct amdkfd_process_info {
        /* List head of all VMs that belong to a KFD process */
index 66bde9e9a4c947ee92116509bd4d15d1a0c4c8a1..e1d1eed7a25fef12bfed061f3f608ca66d134cfb 100644 (file)
@@ -276,6 +276,42 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
        return 0;
 }
 
+int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
+{
+       struct amdgpu_bo *root = bo;
+       struct amdgpu_vm_bo_base *vm_bo;
+       struct amdgpu_vm *vm;
+       struct amdkfd_process_info *info;
+       struct amdgpu_amdkfd_fence *ef;
+       int ret;
+
+       /* we can always get vm_bo from root PD bo.*/
+       while (root->parent)
+               root = root->parent;
+
+       vm_bo = root->vm_bo;
+       if (!vm_bo)
+               return 0;
+
+       vm = vm_bo->vm;
+       if (!vm)
+               return 0;
+
+       info = vm->process_info;
+       if (!info || !info->eviction_fence)
+               return 0;
+
+       ef = container_of(dma_fence_get(&info->eviction_fence->base),
+                       struct amdgpu_amdkfd_fence, base);
+
+       BUG_ON(!dma_resv_trylock(bo->tbo.base.resv));
+       ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef);
+       dma_resv_unlock(bo->tbo.base.resv);
+
+       dma_fence_put(&ef->base);
+       return ret;
+}
+
 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
                                     bool wait)
 {
@@ -1044,6 +1080,8 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
        list_del(&vm->vm_list_node);
        mutex_unlock(&process_info->lock);
 
+       vm->process_info = NULL;
+
        /* Release per-process resources when last compute VM is destroyed */
        if (!process_info->n_vms) {
                WARN_ON(!list_empty(&process_info->kfd_bo_list));
index 65176b97401c222ca37eb0f06ed78894fa4593f6..e4a8c424d290f8c7e785808e87a99fb844d17d24 100644 (file)
@@ -1307,6 +1307,11 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
        if (abo->kfd_bo)
                amdgpu_amdkfd_unreserve_memory_limit(abo);
 
+       /* We only remove the fence if the resv has individualized. */
+       WARN_ON_ONCE(bo->base.resv != &bo->base._resv);
+       if (bo->base.resv == &bo->base._resv)
+               amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo);
+
        if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node ||
            !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE))
                return;