From ed0d3d8cbde1aa42e7610153ca325868fd79cfc1 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 14 Sep 2023 16:02:14 +0200 Subject: [PATCH] ac,radv,radeonsi: prepare support for multi-instance SPM SQ counters Each SQG modules can configure up to 16 counters. Signed-off-by: Samuel Pitoiset Part-of: --- src/amd/common/ac_spm.c | 12 +++++++----- src/amd/common/ac_spm.h | 7 +++++-- src/amd/vulkan/radv_spm.c | 26 ++++++++++++++++---------- src/gallium/drivers/radeonsi/si_perfcounter.c | 26 ++++++++++++++++---------- 4 files changed, 44 insertions(+), 27 deletions(-) diff --git a/src/amd/common/ac_spm.c b/src/amd/common/ac_spm.c index d15b60e..e7c32c3 100644 --- a/src/amd/common/ac_spm.c +++ b/src/amd/common/ac_spm.c @@ -202,11 +202,13 @@ ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel, struct ac_spm_counter_info *counter, uint32_t *spm_wire) { + uint32_t instance = counter->instance; + if (block_sel->b->b->b->gpu_block == SQ) { - for (unsigned i = 0; i < ARRAY_SIZE(spm->sq_block_sel); i++) { - struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[i]; - struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; - if (i < spm->num_used_sq_block_sel) + for (unsigned i = 0; i < ARRAY_SIZE(spm->sqg[instance].counters); i++) { + struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[i]; + + if (i < spm->sqg[instance].num_counters) continue; /* SQ doesn't support 16-bit counters. */ @@ -221,7 +223,7 @@ ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel, /* One wire per SQ module. */ *spm_wire = i; - spm->num_used_sq_block_sel++; + spm->sqg[instance].num_counters++; return true; } } else { diff --git a/src/amd/common/ac_spm.h b/src/amd/common/ac_spm.h index 1aeb666..eaeb268 100644 --- a/src/amd/common/ac_spm.h +++ b/src/amd/common/ac_spm.h @@ -144,8 +144,11 @@ struct ac_spm { /* Block/counters selection. */ uint32_t num_block_sel; struct ac_spm_block_select *block_sel; - uint32_t num_used_sq_block_sel; - struct ac_spm_block_select sq_block_sel[16]; + + struct { + uint32_t num_counters; + struct ac_spm_counter_select counters[16]; + } sqg[AC_SPM_SEGMENT_TYPE_GLOBAL]; /* Muxsel lines. */ unsigned num_muxsel_lines[AC_SPM_SEGMENT_TYPE_COUNT]; diff --git a/src/amd/vulkan/radv_spm.c b/src/amd/vulkan/radv_spm.c index ab620f9..4a2ed63 100644 --- a/src/amd/vulkan/radv_spm.c +++ b/src/amd/vulkan/radv_spm.c @@ -65,19 +65,25 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; struct ac_spm *spm = &device->spm; - radeon_check_space(device->ws, cs, 3 + spm->num_used_sq_block_sel * 3); + for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) { + uint32_t num_counters = spm->sqg[instance].num_counters; - radeon_set_uconfig_reg( - cs, R_030800_GRBM_GFX_INDEX, - S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(0)); + if (!num_counters) + continue; - for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) { - struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b]; - const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; - uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + radeon_check_space(device->ws, cs, 3 + num_counters * 3); - radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1); - radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ + radeon_set_uconfig_reg( + cs, R_030800_GRBM_GFX_INDEX, + S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(instance)); + + for (uint32_t b = 0; b < num_counters; b++) { + const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b]; + uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + + radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1); + radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ + } } for (uint32_t b = 0; b < spm->num_block_sel; b++) { diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c index de0fa34..139ce15 100644 --- a/src/gallium/drivers/radeonsi/si_perfcounter.c +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c @@ -733,18 +733,24 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs) radeon_begin(cs); - radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, - S_030800_SH_BROADCAST_WRITES(1) | - S_030800_INSTANCE_BROADCAST_WRITES(1) | - S_030800_SE_INDEX(0)); + for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) { + uint32_t num_counters = spm->sqg[instance].num_counters; + + if (!num_counters) + continue; - for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) { - struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b]; - const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0]; - uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, + S_030800_SH_BROADCAST_WRITES(1) | + S_030800_INSTANCE_BROADCAST_WRITES(1) | + S_030800_SE_INDEX(instance)); - radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false); - radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ + for (uint32_t b = 0; b < num_counters; b++) { + const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b]; + uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT; + + radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false); + radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */ + } } for (uint32_t b = 0; b < spm->num_block_sel; b++) { -- 2.7.4