drm/amdgpu: move explicit sync check into the CS
authorChristian König <christian.koenig@amd.com>
Thu, 29 Sep 2022 11:05:56 +0000 (13:05 +0200)
committerChristian König <christian.koenig@amd.com>
Thu, 3 Nov 2022 11:45:20 +0000 (12:45 +0100)
This moves the memory allocation out of the critical code path.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Luben Tuikov <luben.tuikov@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014084641.128280-8-christian.koenig@amd.com
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

index aa6f6c4..d45b86b 100644 (file)
@@ -449,8 +449,19 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
        }
 
        r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
-       dma_fence_put(fence);
+       if (r)
+               goto error;
+
+       /*
+        * When we have an explicit dependency it might be necessary to insert a
+        * pipeline sync to make sure that all caches etc are flushed and the
+        * next job actually sees the results from the previous one.
+        */
+       if (fence->context == p->gang_leader->base.entity->fence_context)
+               r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
 
+error:
+       dma_fence_put(fence);
        return r;
 }
 
index 258cffe..774c77b 100644 (file)
@@ -182,7 +182,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
        need_ctx_switch = ring->current_ctx != fence_ctx;
        if (ring->funcs->emit_pipeline_sync && job &&
-           ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
+           ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
             (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
             amdgpu_vm_need_pipeline_sync(ring, job))) {
                need_pipe_sync = true;
index fa60d25..6c12679 100644 (file)
@@ -108,7 +108,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        (*job)->vm = vm;
 
        amdgpu_sync_create(&(*job)->sync);
-       amdgpu_sync_create(&(*job)->sched_sync);
+       amdgpu_sync_create(&(*job)->explicit_sync);
        (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
        (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
 
@@ -176,7 +176,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
        drm_sched_job_cleanup(s_job);
 
        amdgpu_sync_free(&job->sync);
-       amdgpu_sync_free(&job->sched_sync);
+       amdgpu_sync_free(&job->explicit_sync);
 
        dma_fence_put(&job->hw_fence);
 }
@@ -204,7 +204,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 
        amdgpu_job_free_resources(job);
        amdgpu_sync_free(&job->sync);
-       amdgpu_sync_free(&job->sched_sync);
+       amdgpu_sync_free(&job->explicit_sync);
        if (job->gang_submit != &job->base.s_fence->scheduled)
                dma_fence_put(job->gang_submit);
 
@@ -251,12 +251,6 @@ amdgpu_job_dependency(struct drm_sched_job *sched_job,
        int r;
 
        fence = amdgpu_sync_get_fence(&job->sync);
-       if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
-               r = amdgpu_sync_fence(&job->sched_sync, fence);
-               if (r)
-                       DRM_ERROR("Error adding fence (%d)\n", r);
-       }
-
        while (!fence && job->vm && !job->vmid) {
                r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
                if (r)
index e2ecab9..ef4bdde 100644 (file)
@@ -48,7 +48,7 @@ struct amdgpu_job {
        struct drm_sched_job    base;
        struct amdgpu_vm        *vm;
        struct amdgpu_sync      sync;
-       struct amdgpu_sync      sched_sync;
+       struct amdgpu_sync      explicit_sync;
        struct dma_fence        hw_fence;
        struct dma_fence        *gang_submit;
        uint32_t                preamble_status;