drm/amdgpu: add sched sync for amdgpu job v2
authorChunming Zhou <David1.Zhou@amd.com>
Tue, 9 May 2017 07:50:22 +0000 (15:50 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 24 May 2017 21:40:35 +0000 (17:40 -0400)
this is an improvement for previous patch, the sched_sync is to store fence
that could be skipped as scheduled, when job is executed, we didn't need
pipeline_sync if all fences in sched_sync are signalled, otherwise insert
pipeline_sync still.

v2: handle error when adding fence to sync failed.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com> (v1)
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

index ea8ad69..37bd003 100644 (file)
@@ -1124,6 +1124,7 @@ struct amdgpu_job {
        struct amdgpu_vm        *vm;
        struct amdgpu_ring      *ring;
        struct amdgpu_sync      sync;
+       struct amdgpu_sync      sched_sync;
        struct amdgpu_ib        *ibs;
        struct dma_fence        *fence; /* the hw fence */
        uint32_t                preamble_status;
index 53d0d21..631a9f7 100644 (file)
@@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_ib *ib = &ibs[0];
+       struct dma_fence *tmp;
        bool skip_preamble, need_ctx_switch;
        unsigned patch_offset = ~0;
        struct amdgpu_vm *vm;
@@ -160,8 +161,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
                dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
                return r;
        }
-       if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
+
+       if (ring->funcs->emit_pipeline_sync && job &&
+           (tmp = amdgpu_sync_get_fence(&job->sched_sync))) {
+               job->need_pipeline_sync = true;
                amdgpu_ring_emit_pipeline_sync(ring);
+               dma_fence_put(tmp);
+       }
 
        if (vm) {
                r = amdgpu_vm_flush(ring, job);
index 7570f24..4af9264 100644 (file)
@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
        (*job)->need_pipeline_sync = false;
 
        amdgpu_sync_create(&(*job)->sync);
+       amdgpu_sync_create(&(*job)->sched_sync);
 
        return 0;
 }
@@ -98,6 +99,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 
        dma_fence_put(job->fence);
        amdgpu_sync_free(&job->sync);
+       amdgpu_sync_free(&job->sched_sync);
        kfree(job);
 }
 
@@ -107,6 +109,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
 
        dma_fence_put(job->fence);
        amdgpu_sync_free(&job->sync);
+       amdgpu_sync_free(&job->sched_sync);
        kfree(job);
 }
 
@@ -139,10 +142,10 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
        struct amdgpu_vm *vm = job->vm;
 
        struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
+       int r;
 
        while (fence == NULL && vm && !job->vm_id) {
                struct amdgpu_ring *ring = job->ring;
-               int r;
 
                r = amdgpu_vm_grab_id(vm, ring, &job->sync,
                                      &job->base.s_fence->finished,
@@ -153,9 +156,11 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
                fence = amdgpu_sync_get_fence(&job->sync);
        }
 
-       if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
-               job->need_pipeline_sync = true;
-
+       if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) {
+               r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
+               if (r)
+                       DRM_ERROR("Error adding fence to sync (%d)\n", r);
+       }
        return fence;
 }