drm/amdkfd: retry validation to recover range
authorPhilip Yang <Philip.Yang@amd.com>
Tue, 20 Apr 2021 01:51:27 +0000 (21:51 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 29 Apr 2021 03:36:05 +0000 (23:36 -0400)
GPU vm retry fault recover range need retry validation if

1. range is split in parallel by unmap while recover
2. range migrate to system memory and range is updated in system
memory while recover

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_svm.c

index e4ce97ab6e260c2f5704ec9f84c6771dee680046..30d142f6272f30d1918ca1debffbd5c9f50d156e 100644 (file)
@@ -1402,11 +1402,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
        svm_range_lock(prange);
        if (!prange->actual_loc) {
                if (amdgpu_hmm_range_get_pages_done(hmm_range)) {
+                       pr_debug("hmm update the range, need validate again\n");
                        r = -EAGAIN;
                        goto unlock_out;
                }
        }
        if (!list_empty(&prange->child_list)) {
+               pr_debug("range split by unmap in parallel, validate again\n");
                r = -EAGAIN;
                goto unlock_out;
        }
@@ -2355,6 +2357,10 @@ out_unlock_svms:
 out:
        kfd_unref_process(p);
 
+       if (r == -EAGAIN) {
+               pr_debug("recover vm fault later\n");
+               r = 0;
+       }
        return r;
 }