From 2e4e39837ac5cfce34b125050af9a09dd9e07f0e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Tue, 3 May 2022 19:36:47 -0400 Subject: [PATCH] radeonsi/gfx11: add a workaround for CB perf counters Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/common/ac_gpu_info.c | 1 + src/amd/common/ac_gpu_info.h | 1 + src/gallium/drivers/radeonsi/si_perfcounter.c | 18 +++++++++++++----- src/gallium/drivers/radeonsi/si_pipe.h | 3 ++- src/gallium/drivers/radeonsi/si_sqtt.c | 3 ++- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 02d1e15..e8bd2fa 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -1251,6 +1251,7 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->has_3d_cube_border_color_mipmap = info->has_graphics || info->family == CHIP_ARCTURUS; info->never_stop_sq_perf_counters = info->chip_class == GFX10 || info->chip_class == GFX10_3; + info->never_send_perfcounter_stop = info->chip_class == GFX11; info->has_sqtt_rb_harvest_bug = (info->family == CHIP_DIMGREY_CAVEFISH || info->family == CHIP_BEIGE_GOBY || info->family == CHIP_YELLOW_CARP || diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 8cd7a82..9a0bb67 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -90,6 +90,7 @@ struct radeon_info { bool never_stop_sq_perf_counters; bool has_sqtt_rb_harvest_bug; bool has_sqtt_auto_flush_mode_bug; + bool never_send_perfcounter_stop; /* Display features. */ /* There are 2 display DCC codepaths, because display expects unaligned DCC. */ diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index 5779a15..a0e62ae 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -152,8 +152,12 @@ static void si_pc_emit_stop(struct si_context *sctx, struct si_resource *buffer, radeon_begin(cs); radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0)); - radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); + + if (!sctx->screen->info.never_send_perfcounter_stop) { + radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); + } + radeon_set_uconfig_reg( R_036020_CP_PERFMON_CNTL, S_036020_PERFMON_STATE(sctx->screen->info.never_stop_sq_perf_counters ? @@ -179,13 +183,17 @@ void si_pc_emit_spm_start(struct radeon_cmdbuf *cs) radeon_end(); } -void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters) +void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters, + bool never_send_perfcounter_stop) { radeon_begin(cs); /* Stop windowed performance counters. */ - radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); - radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); + if (!never_send_perfcounter_stop) { + radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); + radeon_emit(EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0)); + } + radeon_set_sh_reg(R_00B82C_COMPUTE_PERFCOUNT_ENABLE, S_00B82C_PERFCOUNT_ENABLE(0)); /* Stop SPM counters. */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 9fa9fd4..4eae5b6 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -1522,7 +1522,8 @@ void si_destroy_perfcounters(struct si_screen *screen); void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit); void si_pc_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders); void si_pc_emit_spm_start(struct radeon_cmdbuf *cs); -void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters); +void si_pc_emit_spm_stop(struct radeon_cmdbuf *cs, bool never_stop_sq_perf_counters, + bool never_send_perfcounter_stop); void si_pc_emit_spm_reset(struct radeon_cmdbuf *cs); void si_emit_spm_setup(struct si_context *sctx, struct radeon_cmdbuf *cs); bool si_spm_init(struct si_context *sctx); diff --git a/src/gallium/drivers/radeonsi/si_sqtt.c b/src/gallium/drivers/radeonsi/si_sqtt.c index 406f8e6..86913e8 100644 --- a/src/gallium/drivers/radeonsi/si_sqtt.c +++ b/src/gallium/drivers/radeonsi/si_sqtt.c @@ -461,7 +461,8 @@ si_thread_trace_stop(struct si_context *sctx, int family, struct radeon_cmdbuf * si_cp_dma_wait_for_idle(sctx, cs); - si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters); + si_pc_emit_spm_stop(cs, sctx->screen->info.never_stop_sq_perf_counters, + sctx->screen->info.never_send_perfcounter_stop); /* Make sure to wait-for-idle before stopping SQTT. */ sctx->flags |= -- 2.7.4