From 1202d8b0f941f4ff58293d5a6751d9242226b492 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Wed, 26 Apr 2023 14:26:07 +0200 Subject: [PATCH] ac/sqtt: add ac_sqtt_get_trace() helper It can be shared between RADV and RadeonSI. The only difference is that RadeonSI can't auto-resize the SQTT BO. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_sqtt.c | 44 +++++++++++++++++++++++- src/amd/common/ac_sqtt.h | 6 +++- src/amd/vulkan/radv_sqtt.c | 41 +++------------------- src/gallium/drivers/radeonsi/si_sqtt.c | 63 +++++++++++++--------------------- 4 files changed, 77 insertions(+), 77 deletions(-) diff --git a/src/amd/common/ac_sqtt.c b/src/amd/common/ac_sqtt.c index 5e596fe..7570eb0 100644 --- a/src/amd/common/ac_sqtt.c +++ b/src/amd/common/ac_sqtt.c @@ -107,7 +107,7 @@ ac_thread_trace_finish(struct ac_thread_trace_data *data) } bool -ac_is_thread_trace_complete(struct radeon_info *rad_info, +ac_is_thread_trace_complete(const struct radeon_info *rad_info, const struct ac_thread_trace_data *data, const struct ac_thread_trace_info *info) { @@ -262,3 +262,45 @@ ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se) /* No active CU on the SE means it is disabled. */ return info->cu_mask[se][0] == 0; } + +bool +ac_sqtt_get_trace(struct ac_thread_trace_data *data, + const struct radeon_info *info, + struct ac_thread_trace *thread_trace) +{ + unsigned max_se = info->max_se; + void *ptr = data->ptr; + + memset(thread_trace, 0, sizeof(*thread_trace)); + + for (unsigned se = 0; se < max_se; se++) { + uint64_t info_offset = ac_thread_trace_get_info_offset(se); + uint64_t data_offset = ac_thread_trace_get_data_offset(info, data, se); + void *info_ptr = (uint8_t *)ptr + info_offset; + void *data_ptr = (uint8_t *)ptr + data_offset; + struct ac_thread_trace_info *trace_info = (struct ac_thread_trace_info *)info_ptr; + struct ac_thread_trace_se thread_trace_se = {0}; + int first_active_cu = ffs(info->cu_mask[se][0]); + + if (ac_sqtt_se_is_disabled(info, se)) + continue; + + if (!ac_is_thread_trace_complete(info, data, trace_info)) + return false; + + thread_trace_se.data_ptr = data_ptr; + thread_trace_se.info = *trace_info; + thread_trace_se.shader_engine = se; + + /* RGP seems to expect units of WGP on GFX10+. */ + thread_trace_se.compute_unit = + info->gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu; + + thread_trace->traces[thread_trace->num_traces] = thread_trace_se; + thread_trace->num_traces++; + } + + thread_trace->data = data; + + return true; +} diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h index a3b0208..505f2ba 100644 --- a/src/amd/common/ac_sqtt.h +++ b/src/amd/common/ac_sqtt.h @@ -106,7 +106,7 @@ void ac_thread_trace_finish(struct ac_thread_trace_data *data); bool -ac_is_thread_trace_complete(struct radeon_info *rad_info, +ac_is_thread_trace_complete(const struct radeon_info *rad_info, const struct ac_thread_trace_data *data, const struct ac_thread_trace_info *info); @@ -562,4 +562,8 @@ union rgp_sqtt_marker_cb_id ac_sqtt_get_next_cmdbuf_id(struct ac_thread_trace_da bool ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se); +bool ac_sqtt_get_trace(struct ac_thread_trace_data *data, + const struct radeon_info *info, + struct ac_thread_trace *thread_trace); + #endif diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 0f5d413..03eaa3b 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -810,47 +810,16 @@ radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_t { struct radv_device *device = queue->device; struct radeon_info *rad_info = &device->physical_device->rad_info; - unsigned max_se = rad_info->max_se; - void *thread_trace_ptr = device->thread_trace.ptr; - - memset(thread_trace, 0, sizeof(*thread_trace)); - - for (unsigned se = 0; se < max_se; se++) { - uint64_t info_offset = ac_thread_trace_get_info_offset(se); - uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se); - void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset; - void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset; - struct ac_thread_trace_info *info = (struct ac_thread_trace_info *)info_ptr; - struct ac_thread_trace_se thread_trace_se = {0}; - int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]); - - if (ac_sqtt_se_is_disabled(rad_info, se)) - continue; - if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace, - info)) { - if (!radv_thread_trace_resize_bo(device)) { - fprintf(stderr, "Failed to resize the thread " - "trace buffer.\n"); - abort(); - } - return false; + if (!ac_sqtt_get_trace(&device->thread_trace, rad_info, thread_trace)) { + if (!radv_thread_trace_resize_bo(device)) { + fprintf(stderr, "Failed to resize the thread trace buffer.\n"); + abort(); } - thread_trace_se.data_ptr = data_ptr; - thread_trace_se.info = *info; - thread_trace_se.shader_engine = se; - - /* RGP seems to expect units of WGP on GFX10+. */ - thread_trace_se.compute_unit = device->physical_device->rad_info.gfx_level >= GFX10 - ? (first_active_cu / 2) - : first_active_cu; - - thread_trace->traces[thread_trace->num_traces] = thread_trace_se; - thread_trace->num_traces++; + return false; } - thread_trace->data = &device->thread_trace; return true; } diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 5d2438e..d9c91c1 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -616,51 +616,36 @@ si_get_thread_trace(struct si_context *sctx, if (!sctx->thread_trace->ptr) return false; - void *thread_trace_ptr = sctx->thread_trace->ptr; + if (!ac_sqtt_get_trace(sctx->thread_trace, &sctx->screen->info, + thread_trace)) { + void *thread_trace_ptr = sctx->thread_trace->ptr; - for (unsigned se = 0; se < max_se; se++) { - uint64_t info_offset = ac_thread_trace_get_info_offset(se); - uint64_t data_offset = ac_thread_trace_get_data_offset(&sctx->screen->info, sctx->thread_trace, se); - void *info_ptr = thread_trace_ptr + info_offset; - void *data_ptr = thread_trace_ptr + data_offset; - struct ac_thread_trace_info *info = - (struct ac_thread_trace_info *)info_ptr; - - struct ac_thread_trace_se thread_trace_se = {0}; + for (unsigned se = 0; se < max_se; se++) { + uint64_t info_offset = ac_thread_trace_get_info_offset(se); + void *info_ptr = thread_trace_ptr + info_offset; + struct ac_thread_trace_info *info = + (struct ac_thread_trace_info *)info_ptr; - if (ac_sqtt_se_is_disabled(&sctx->screen->info, se)) - continue; + if (ac_sqtt_se_is_disabled(&sctx->screen->info, se)) + continue; - if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) { - uint32_t expected_size = - ac_get_expected_buffer_size(&sctx->screen->info, info); - uint32_t available_size = (info->cur_offset * 32) / 1024; - - fprintf(stderr, "Failed to get the thread trace " - "because the buffer is too small. The " - "hardware needs %d KB but the " - "buffer size is %d KB.\n", - expected_size, available_size); - fprintf(stderr, "Please update the buffer size with " - "AMD_THREAD_TRACE_BUFFER_SIZE=\n"); - return false; + if (!ac_is_thread_trace_complete(&sctx->screen->info, sctx->thread_trace, info)) { + uint32_t expected_size = + ac_get_expected_buffer_size(&sctx->screen->info, info); + uint32_t available_size = (info->cur_offset * 32) / 1024; + + fprintf(stderr, "Failed to get the thread trace " + "because the buffer is too small. The " + "hardware needs %d KB but the " + "buffer size is %d KB.\n", + expected_size, available_size); + fprintf(stderr, "Please update the buffer size with " + "AMD_THREAD_TRACE_BUFFER_SIZE=\n"); + return false; + } } - - thread_trace_se.data_ptr = data_ptr; - thread_trace_se.info = *info; - thread_trace_se.shader_engine = se; - - int first_active_cu = ffs(sctx->screen->info.cu_mask[se][0]); - - /* For GFX10+ compute_unit really means WGP */ - thread_trace_se.compute_unit = - sctx->screen->info.gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu; - - thread_trace->traces[thread_trace->num_traces] = thread_trace_se; - thread_trace->num_traces++; } - thread_trace->data = sctx->thread_trace; return true; } -- 2.7.4