From 12f1c5d23be44dadab85ba55be2a3e6789909306 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Tue, 12 Sep 2023 08:51:49 +0200 Subject: [PATCH] radv: fix instruction timing on GFX11 GFX11 seems to operate on the last active CU. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_sqtt.c | 20 ++++++++++++++++++-- src/amd/common/ac_sqtt.h | 2 ++ src/amd/vulkan/radv_sqtt.c | 8 ++++---- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/amd/common/ac_sqtt.c b/src/amd/common/ac_sqtt.c index a4737e0..a0b3939 100644 --- a/src/amd/common/ac_sqtt.c +++ b/src/amd/common/ac_sqtt.c @@ -233,6 +233,22 @@ ac_sqtt_se_is_disabled(const struct radeon_info *info, unsigned se) return info->cu_mask[se][0] == 0; } +uint32_t +ac_sqtt_get_active_cu(const struct radeon_info *info, unsigned se) +{ + uint32_t cu_index; + + if (info->gfx_level >= GFX11) { + /* GFX11 seems to operate on the last active CU. */ + cu_index = util_last_bit(info->cu_mask[se][0]) - 1; + } else { + /* Default to the first active CU. */ + cu_index = ffs(info->cu_mask[se][0]); + } + + return cu_index; +} + bool ac_sqtt_get_trace(struct ac_sqtt *data, const struct radeon_info *info, struct ac_sqtt_trace *sqtt_trace) @@ -249,7 +265,7 @@ ac_sqtt_get_trace(struct ac_sqtt *data, const struct radeon_info *info, void *data_ptr = (uint8_t *)ptr + data_offset; struct ac_sqtt_data_info *trace_info = (struct ac_sqtt_data_info *)info_ptr; struct ac_sqtt_data_se data_se = {0}; - int first_active_cu = ffs(info->cu_mask[se][0]); + int active_cu = ac_sqtt_get_active_cu(info, se); if (ac_sqtt_se_is_disabled(info, se)) continue; @@ -262,7 +278,7 @@ ac_sqtt_get_trace(struct ac_sqtt *data, const struct radeon_info *info, data_se.shader_engine = se; /* RGP seems to expect units of WGP on GFX10+. */ - data_se.compute_unit = info->gfx_level >= GFX10 ? (first_active_cu / 2) : first_active_cu; + data_se.compute_unit = info->gfx_level >= GFX10 ? (active_cu / 2) : active_cu; sqtt_trace->traces[sqtt_trace->num_traces] = data_se; sqtt_trace->num_traces++; diff --git a/src/amd/common/ac_sqtt.h b/src/amd/common/ac_sqtt.h index d67e380..631cf1d 100644 --- a/src/amd/common/ac_sqtt.h +++ b/src/amd/common/ac_sqtt.h @@ -556,4 +556,6 @@ bool ac_sqtt_get_trace(struct ac_sqtt *sqtt, const struct radeon_info *info, uint32_t ac_sqtt_get_shader_mask(const struct radeon_info *info); +uint32_t ac_sqtt_get_active_cu(const struct radeon_info *info, unsigned se); + #endif diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c index 8f65657..085ff7a 100644 --- a/src/amd/vulkan/radv_sqtt.c +++ b/src/amd/vulkan/radv_sqtt.c @@ -85,7 +85,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va = radv_buffer_get_va(device->sqtt.bo); uint64_t data_va = ac_sqtt_get_data_va(rad_info, &device->sqtt, va, se); uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT; - int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]); + int active_cu = ac_sqtt_get_active_cu(&device->physical_device->rad_info, se); if (ac_sqtt_se_is_disabled(rad_info, se)) continue; @@ -103,7 +103,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_perfctr_reg(gfx_level, qf, cs, R_0367B4_SQ_THREAD_TRACE_MASK, S_0367B4_WTYPE_INCLUDE(shader_mask) | S_0367B4_SA_SEL(0) | - S_0367B4_WGP_SEL(first_active_cu / 2) | S_0367B4_SIMD_SEL(0)); + S_0367B4_WGP_SEL(active_cu / 2) | S_0367B4_SIMD_SEL(0)); uint32_t sqtt_token_mask = S_0367B8_REG_INCLUDE(V_0367B8_REG_INCLUDE_SQDEC | V_0367B8_REG_INCLUDE_SHDEC | V_0367B8_REG_INCLUDE_GFXUDEC | V_0367B8_REG_INCLUDE_COMP | @@ -134,7 +134,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_privileged_config_reg(cs, R_008D14_SQ_THREAD_TRACE_MASK, S_008D14_WTYPE_INCLUDE(shader_mask) | S_008D14_SA_SEL(0) | - S_008D14_WGP_SEL(first_active_cu / 2) | S_008D14_SIMD_SEL(0)); + S_008D14_WGP_SEL(active_cu / 2) | S_008D14_SIMD_SEL(0)); uint32_t sqtt_token_mask = S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | V_008D18_REG_INCLUDE_GFXUDEC | V_008D18_REG_INCLUDE_COMP | @@ -166,7 +166,7 @@ radv_emit_sqtt_start(const struct radv_device *device, struct radeon_cmdbuf *cs, radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1)); - uint32_t sqtt_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) | S_030CC8_SIMD_EN(0xf) | + uint32_t sqtt_mask = S_030CC8_CU_SEL(active_cu) | S_030CC8_SH_SEL(0) | S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) | S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) | S_030CC8_SQ_STALL_EN(1); -- 2.7.4