From 56bff270febd4ab58a4bcb8fd5ab6787089513e0 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 26 Feb 2021 15:19:25 +0100 Subject: [PATCH] radeonsi,radv: do not overallocate the SQTT buffer size The number of shader engines isn't always 4. Signed-off-by: Samuel Pitoiset Reviewed-by: Pierre-Eric Pelloux-Prayer Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/amd/common/ac_sqtt.c | 11 +++++++---- src/amd/common/ac_sqtt.h | 6 ++++-- src/amd/vulkan/radv_sqtt.c | 17 ++++++++++------- src/gallium/drivers/radeonsi/si_sqtt.c | 11 ++++++----- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/amd/common/ac_sqtt.c b/src/amd/common/ac_sqtt.c index 863dfa2..1f8bda5 100644 --- a/src/amd/common/ac_sqtt.c +++ b/src/amd/common/ac_sqtt.c @@ -35,11 +35,13 @@ ac_thread_trace_get_info_offset(unsigned se) } uint64_t -ac_thread_trace_get_data_offset(struct ac_thread_trace_data *data, unsigned se) +ac_thread_trace_get_data_offset(const struct radeon_info *rad_info, + const struct ac_thread_trace_data *data, unsigned se) { + unsigned max_se = rad_info->max_se; uint64_t data_offset; - data_offset = align64(sizeof(struct ac_thread_trace_info) * 4, + data_offset = align64(sizeof(struct ac_thread_trace_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT); data_offset += data->buffer_size * se; @@ -53,9 +55,10 @@ ac_thread_trace_get_info_va(uint64_t va, unsigned se) } uint64_t -ac_thread_trace_get_data_va(struct ac_thread_trace_data *data, uint64_t va, unsigned se) +ac_thread_trace_get_data_va(const struct radeon_info *rad_info, + const struct ac_thread_trace_data *data, uint64_t va, unsigned se) { - return va + ac_thread_trace_get_data_offset(data, se); + return va + ac_thread_trace_get_data_offset(rad_info, data, se); } bool diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h index 4b8e282..267b609 100644 --- a/src/amd/common/ac_sqtt.h +++ b/src/amd/common/ac_sqtt.h @@ -78,12 +78,14 @@ uint64_t ac_thread_trace_get_info_offset(unsigned se); uint64_t -ac_thread_trace_get_data_offset(struct ac_thread_trace_data *data, unsigned se); +ac_thread_trace_get_data_offset(const struct radeon_info *rad_info, + const struct ac_thread_trace_data *data, unsigned se); uint64_t ac_thread_trace_get_info_va(uint64_t va, unsigned se); uint64_t -ac_thread_trace_get_data_va(struct ac_thread_trace_data *data, uint64_t va, unsigned se); +ac_thread_trace_get_data_va(const struct radeon_info *rad_info, + const struct ac_thread_trace_data *data, uint64_t va, unsigned se); bool ac_is_thread_trace_complete(struct radeon_info *rad_info, const struct ac_thread_trace_info *info); diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index ed7e5df..4bfdc4b 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -36,13 +36,14 @@ radv_emit_thread_trace_start(struct radv_device *device, uint32_t queue_family_index) { uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT; - unsigned max_se = device->physical_device->rad_info.max_se; + struct radeon_info *rad_info = &device->physical_device->rad_info; + unsigned max_se = rad_info->max_se; assert(device->physical_device->rad_info.chip_class >= GFX8); for (unsigned se = 0; se < max_se; se++) { uint64_t va = radv_buffer_get_va(device->thread_trace.bo); - uint64_t data_va = ac_thread_trace_get_data_va(&device->thread_trace, va, se); + uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se); uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]); @@ -400,6 +401,7 @@ radv_emit_wait_for_idle(struct radv_device *device, static bool radv_thread_trace_init_bo(struct radv_device *device) { + unsigned max_se = device->physical_device->rad_info.max_se; struct radeon_winsys *ws = device->ws; uint64_t size; @@ -409,10 +411,10 @@ radv_thread_trace_init_bo(struct radv_device *device) device->thread_trace.buffer_size = align64(device->thread_trace.buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT); - /* Compute total size of the thread trace BO for 4 SEs. */ - size = align64(sizeof(struct ac_thread_trace_info) * 4, + /* Compute total size of the thread trace BO for all SEs. */ + size = align64(sizeof(struct ac_thread_trace_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT); - size += device->thread_trace.buffer_size * 4ll; + size += device->thread_trace.buffer_size * (uint64_t)max_se; device->thread_trace.bo = ws->buffer_create(ws, size, 4096, RADEON_DOMAIN_VRAM, @@ -625,7 +627,8 @@ radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace) { struct radv_device *device = queue->device; - unsigned max_se = device->physical_device->rad_info.max_se; + struct radeon_info *rad_info = &device->physical_device->rad_info; + unsigned max_se = rad_info->max_se; void *thread_trace_ptr = device->thread_trace.ptr; memset(thread_trace, 0, sizeof(*thread_trace)); @@ -633,7 +636,7 @@ radv_get_thread_trace(struct radv_queue *queue, for (unsigned se = 0; se < max_se; se++) { uint64_t info_offset = ac_thread_trace_get_info_offset(se); - uint64_t data_offset = ac_thread_trace_get_data_offset(&device->thread_trace, se); + uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se); void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset; void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset; struct ac_thread_trace_info *info = diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 8fc751b..3cca359 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -38,6 +38,7 @@ si_emit_spi_config_cntl(struct si_context* sctx, static bool si_thread_trace_init_bo(struct si_context *sctx) { + unsigned max_se = sctx->screen->info.max_se; struct radeon_winsys *ws = sctx->ws; uint64_t size; @@ -47,10 +48,10 @@ si_thread_trace_init_bo(struct si_context *sctx) sctx->thread_trace->buffer_size = align64(sctx->thread_trace->buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT); - /* Compute total size of the thread trace BO for 4 SEs. */ - size = align64(sizeof(struct ac_thread_trace_info) * 4, + /* Compute total size of the thread trace BO for all SEs. */ + size = align64(sizeof(struct ac_thread_trace_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT); - size += sctx->thread_trace->buffer_size * 4ll; + size += sctx->thread_trace->buffer_size * (uint64_t)max_se; sctx->thread_trace->bo = ws->buffer_create(ws, size, 4096, @@ -77,7 +78,7 @@ si_emit_thread_trace_start(struct si_context* sctx, for (unsigned se = 0; se < max_se; se++) { uint64_t va = sctx->ws->buffer_get_virtual_address(sctx->thread_trace->bo); - uint64_t data_va = ac_thread_trace_get_data_va(sctx->thread_trace, va, se); + uint64_t data_va = ac_thread_trace_get_data_va(&sctx->screen->info, sctx->thread_trace, va, se); uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; /* Target SEx and SH0. */ @@ -495,7 +496,7 @@ si_get_thread_trace(struct si_context *sctx, for (unsigned se = 0; se < max_se; se++) { uint64_t info_offset = ac_thread_trace_get_info_offset(se); - uint64_t data_offset = ac_thread_trace_get_data_offset(sctx->thread_trace, se); + uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se); void *info_ptr = thread_trace_ptr + info_offset; void *data_ptr = thread_trace_ptr + data_offset; struct ac_thread_trace_info *info = -- 2.7.4