drm/amdgpu: use the last IB as gang leader v2
authorChristian König <christian.koenig@amd.com>
Wed, 9 Nov 2022 18:54:09 +0000 (19:54 +0100)
committerChristian König <christian.koenig@amd.com>
Tue, 15 Nov 2022 14:27:18 +0000 (15:27 +0100)
It turned out that not the last IB specified is the gang leader,
but instead the last job allocated.

This is a bit unfortunate and not very intuitive for the CS
interface, so try to fix this.

Signed-off-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221115094206.6181-1-christian.koenig@amd.com
Tested-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Fixes: 4624459c84d7 ("drm/amdgpu: add gang submit frontend v6")

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h

index 1bbd39b..fbdf139 100644 (file)
@@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
                return r;
 
        ++(num_ibs[r]);
+       p->gang_leader_idx = r;
        return 0;
 }
 
@@ -300,7 +301,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
                if (ret)
                        goto free_all_kdata;
        }
-       p->gang_leader = p->jobs[p->gang_size - 1];
+       p->gang_leader = p->jobs[p->gang_leader_idx];
 
        if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
                ret = -ECANCELED;
@@ -1194,16 +1195,18 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
                        return r;
        }
 
-       for (i = 0; i < p->gang_size - 1; ++i) {
+       for (i = 0; i < p->gang_size; ++i) {
+               if (p->jobs[i] == leader)
+                       continue;
+
                r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
                if (r)
                        return r;
        }
 
-       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
+       r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
        if (r && r != -ERESTARTSYS)
                DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
-
        return r;
 }
 
@@ -1237,9 +1240,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        for (i = 0; i < p->gang_size; ++i)
                drm_sched_job_arm(&p->jobs[i]->base);
 
-       for (i = 0; i < (p->gang_size - 1); ++i) {
+       for (i = 0; i < p->gang_size; ++i) {
                struct dma_fence *fence;
 
+               if (p->jobs[i] == leader)
+                       continue;
+
                fence = &p->jobs[i]->base.s_fence->scheduled;
                r = amdgpu_sync_fence(&leader->sync, fence);
                if (r)
@@ -1275,7 +1281,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
        list_for_each_entry(e, &p->validated, tv.head) {
 
                /* Everybody except for the gang leader uses READ */
-               for (i = 0; i < (p->gang_size - 1); ++i) {
+               for (i = 0; i < p->gang_size; ++i) {
+                       if (p->jobs[i] == leader)
+                               continue;
+
                        dma_resv_add_fence(e->tv.bo->base.resv,
                                           &p->jobs[i]->base.s_fence->finished,
                                           DMA_RESV_USAGE_READ);
@@ -1285,7 +1294,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
                e->tv.num_shared = 0;
        }
 
-       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
+       seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
                                   p->fence);
        amdgpu_cs_post_dependencies(p);
 
index cbaa19b..f80adf9 100644 (file)
@@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
 
        /* scheduler job objects */
        unsigned int            gang_size;
+       unsigned int            gang_leader_idx;
        struct drm_sched_entity *entities[AMDGPU_CS_GANG_SIZE];
        struct amdgpu_job       *jobs[AMDGPU_CS_GANG_SIZE];
        struct amdgpu_job       *gang_leader;