drm/amdgpu: abort submissions during prepare on error
authorChristian König <christian.koenig@amd.com>
Mon, 17 Apr 2023 16:15:15 +0000 (18:15 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 15 Jun 2023 15:37:55 +0000 (11:37 -0400)
Forward errors from previous submissions to this one.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index aca3a2b..9e6f2fa 100644 (file)
@@ -258,16 +258,27 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
        struct dma_fence *fence = NULL;
        int r;
 
+       /* Ignore soft recovered fences here */
+       r = drm_sched_entity_error(s_entity);
+       if (r && r != -ENODATA)
+               goto error;
+
        if (!fence && job->gang_submit)
                fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
 
        while (!fence && job->vm && !job->vmid) {
                r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
-               if (r)
+               if (r) {
                        DRM_ERROR("Error getting VM ID (%d)\n", r);
+                       goto error;
+               }
        }
 
        return fence;
+
+error:
+       dma_fence_set_error(&job->base.s_fence->finished, r);
+       return NULL;
 }
 
 static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)