drm/amdkfd: Make TLB flush conditional on mapping
authorEric Huang <jinhuieric.huang@amd.com>
Tue, 1 Jun 2021 22:54:32 +0000 (18:54 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 4 Jun 2021 16:40:01 +0000 (12:40 -0400)
It is to optimize memory mapping latency, and also aviod
a page fault in a corner case of changing valid PDE into
PTE.

Signed-off-by: Eric Huang <jinhuieric.huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c

index cf62f43..db16b3e 100644 (file)
@@ -269,7 +269,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
                struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
                uint64_t *size);
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
+               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, bool *table_freed);
 int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
                struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
 int amdgpu_amdkfd_gpuvm_sync_memory(
index 141cd29..ed0e094 100644 (file)
@@ -1070,7 +1070,8 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
 
 static int update_gpuvm_pte(struct kgd_mem *mem,
                            struct kfd_mem_attachment *entry,
-                           struct amdgpu_sync *sync)
+                           struct amdgpu_sync *sync,
+                           bool *table_freed)
 {
        struct amdgpu_bo_va *bo_va = entry->bo_va;
        struct amdgpu_device *adev = entry->adev;
@@ -1081,7 +1082,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
                return ret;
 
        /* Update the page tables  */
-       ret = amdgpu_vm_bo_update(adev, bo_va, false);
+       ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed);
        if (ret) {
                pr_err("amdgpu_vm_bo_update failed\n");
                return ret;
@@ -1093,7 +1094,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
 static int map_bo_to_gpuvm(struct kgd_mem *mem,
                           struct kfd_mem_attachment *entry,
                           struct amdgpu_sync *sync,
-                          bool no_update_pte)
+                          bool no_update_pte,
+                          bool *table_freed)
 {
        int ret;
 
@@ -1110,7 +1112,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
        if (no_update_pte)
                return 0;
 
-       ret = update_gpuvm_pte(mem, entry, sync);
+       ret = update_gpuvm_pte(mem, entry, sync, table_freed);
        if (ret) {
                pr_err("update_gpuvm_pte() failed\n");
                goto update_gpuvm_pte_failed;
@@ -1608,7 +1610,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 }
 
 int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-               struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
+               struct kgd_dev *kgd, struct kgd_mem *mem,
+               void *drm_priv, bool *table_freed)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
        struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
@@ -1696,7 +1699,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
                         entry->va, entry->va + bo_size, entry);
 
                ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
-                                     is_invalid_userptr);
+                                     is_invalid_userptr, table_freed);
                if (ret) {
                        pr_err("Failed to map bo to gpuvm\n");
                        goto out_unreserve;
@@ -2146,7 +2149,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
                                continue;
 
                        kfd_mem_dmaunmap_attachment(mem, attachment);
-                       ret = update_gpuvm_pte(mem, attachment, &sync);
+                       ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
                        if (ret) {
                                pr_err("%s: update PTE failed\n", __func__);
                                /* make sure this gets validated again */
@@ -2352,7 +2355,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
                                continue;
 
                        kfd_mem_dmaunmap_attachment(mem, attachment);
-                       ret = update_gpuvm_pte(mem, attachment, &sync_obj);
+                       ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
                        if (ret) {
                                pr_debug("Memory eviction: update PTE failed. Try again\n");
                                goto validate_map_fail;
index e88f638..cf483c0 100644 (file)
@@ -1393,6 +1393,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
        long err = 0;
        int i;
        uint32_t *devices_arr = NULL;
+       bool table_freed = false;
 
        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
        if (!dev)
@@ -1450,7 +1451,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
                        goto get_mem_obj_from_handle_failed;
                }
                err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
-                       peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
+                       peer->kgd, (struct kgd_mem *)mem,
+                       peer_pdd->drm_priv, &table_freed);
                if (err) {
                        pr_err("Failed to map to gpu %d/%d\n",
                               i, args->n_devices);
@@ -1468,16 +1470,17 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
        }
 
        /* Flush TLBs after waiting for the page table updates to complete */
-       for (i = 0; i < args->n_devices; i++) {
-               peer = kfd_device_by_id(devices_arr[i]);
-               if (WARN_ON_ONCE(!peer))
-                       continue;
-               peer_pdd = kfd_get_process_device_data(peer, p);
-               if (WARN_ON_ONCE(!peer_pdd))
-                       continue;
-               kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+       if (table_freed) {
+               for (i = 0; i < args->n_devices; i++) {
+                       peer = kfd_device_by_id(devices_arr[i]);
+                       if (WARN_ON_ONCE(!peer))
+                               continue;
+                       peer_pdd = kfd_get_process_device_data(peer, p);
+                       if (WARN_ON_ONCE(!peer_pdd))
+                               continue;
+                       kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+               }
        }
-
        kfree(devices_arr);
 
        return err;
index 1a99771..bfa6c4c 100644 (file)
@@ -672,7 +672,8 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
        if (err)
                goto err_alloc_mem;
 
-       err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem, pdd->drm_priv);
+       err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+                       pdd->drm_priv, NULL);
        if (err)
                goto err_map_mem;