From 4d30f2c6f42c9653f9ff49af6c5be0218f8964f3 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 15 Nov 2018 11:29:53 +0100 Subject: [PATCH] radv/winsys: remove the max IBs per submit limit for the fallback path The chained submission is the fastest path and it should now be used more often than before. This removes some EOP events. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 103 ++++++++++++++------------ 1 file changed, 55 insertions(+), 48 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index abc4f39..f2d07a5 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -865,66 +865,73 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx); struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence; amdgpu_bo_list_handle bo_list; - struct amdgpu_cs_request request; - bool emit_signal_sem = sem_info->cs_emit_signal; + struct amdgpu_cs_request request = {}; + struct amdgpu_cs_ib_info *ibs; + struct radv_amdgpu_cs *cs0; + unsigned number_of_ibs; + assert(cs_count); + cs0 = radv_amdgpu_cs(cs_array[0]); - for (unsigned i = 0; i < cs_count;) { - struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[i]); - struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; - struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; - unsigned cnt = MIN2(AMDGPU_CS_MAX_IBS_PER_SUBMIT - !!preamble_cs, - cs_count - i); + /* Compute the number of IBs for this submit. */ + number_of_ibs = cs_count + !!initial_preamble_cs; - memset(&request, 0, sizeof(request)); + /* Create a buffer object list. */ + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0, + initial_preamble_cs, radv_bo_list, + &bo_list); + if (r) { + fprintf(stderr, "amdgpu: buffer list creation failed " + "for the fallback submission (%d)\n", r); + return r; + } - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0, - preamble_cs, radv_bo_list, &bo_list); - if (r) { - fprintf(stderr, "amdgpu: buffer list creation failed " - "for the fallback submission (%d)\n", r); - return r; - } + ibs = malloc(number_of_ibs * sizeof(*ibs)); + if (!ibs) { + if (bo_list) + amdgpu_bo_list_destroy(bo_list); + return -ENOMEM; + } - request.ip_type = cs0->hw_ip; - request.ring = queue_idx; - request.resources = bo_list; - request.number_of_ibs = cnt + !!preamble_cs; - request.ibs = ibs; - request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); + /* Configure the CS request. */ + if (initial_preamble_cs) + ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib; - if (preamble_cs) { - ibs[0] = radv_amdgpu_cs(preamble_cs)->ib; - } + for (unsigned i = 0; i < cs_count; i++) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); - for (unsigned j = 0; j < cnt; ++j) { - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); - ibs[j + !!preamble_cs] = cs->ib; + ibs[i + !!initial_preamble_cs] = cs->ib; - if (cs->is_chained) { - *cs->ib_size_ptr -= 4; - cs->is_chained = false; - } + if (cs->is_chained) { + *cs->ib_size_ptr -= 4; + cs->is_chained = false; } + } - sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false; - r = radv_amdgpu_cs_submit(ctx, &request, sem_info); - if (r) { - if (r == -ENOMEM) - fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); - else - fprintf(stderr, "amdgpu: The CS has been rejected, " - "see dmesg for more information.\n"); - } + request.ip_type = cs0->hw_ip; + request.ring = queue_idx; + request.resources = bo_list; + request.number_of_ibs = number_of_ibs; + request.ibs = ibs; + request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); - if (bo_list) - amdgpu_bo_list_destroy(bo_list); + /* Submit the CS. */ + r = radv_amdgpu_cs_submit(ctx, &request, sem_info); + if (r) { + if (r == -ENOMEM) + fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); + else + fprintf(stderr, "amdgpu: The CS has been rejected, " + "see dmesg for more information.\n"); + } - if (r) - return r; + if (bo_list) + amdgpu_bo_list_destroy(bo_list); + free(ibs); + + if (r) + return r; - i += cnt; - } if (fence) radv_amdgpu_request_to_fence(ctx, fence, &request); @@ -1131,7 +1138,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, if (!cs->ws->use_ib_bos) { ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); - } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) { + } else if (can_patch && cs->ws->batchchain) { ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, bo_list, cs_array, cs_count, initial_preamble_cs, continue_preamble_cs, _fence); } else { -- 2.7.4