ac,radv,radeonsi: prepare support for multi-instance SPM SQ counters
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 14 Sep 2023 14:02:14 +0000 (16:02 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 18 Sep 2023 07:07:31 +0000 (07:07 +0000)
Each SQG modules can configure up to 16 counters.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25240>

src/amd/common/ac_spm.c
src/amd/common/ac_spm.h
src/amd/vulkan/radv_spm.c
src/gallium/drivers/radeonsi/si_perfcounter.c

index d15b60e..e7c32c3 100644 (file)
@@ -202,11 +202,13 @@ ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel,
                    struct ac_spm_counter_info *counter,
                    uint32_t *spm_wire)
 {
+   uint32_t instance = counter->instance;
+
    if (block_sel->b->b->b->gpu_block == SQ) {
-      for (unsigned i = 0; i < ARRAY_SIZE(spm->sq_block_sel); i++) {
-         struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[i];
-         struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
-         if (i < spm->num_used_sq_block_sel)
+      for (unsigned i = 0; i < ARRAY_SIZE(spm->sqg[instance].counters); i++) {
+         struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[i];
+
+         if (i < spm->sqg[instance].num_counters)
             continue;
 
          /* SQ doesn't support 16-bit counters. */
@@ -221,7 +223,7 @@ ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel,
          /* One wire per SQ module. */
          *spm_wire = i;
 
-         spm->num_used_sq_block_sel++;
+         spm->sqg[instance].num_counters++;
          return true;
       }
    } else {
index 1aeb666..eaeb268 100644 (file)
@@ -144,8 +144,11 @@ struct ac_spm {
    /* Block/counters selection. */
    uint32_t num_block_sel;
    struct ac_spm_block_select *block_sel;
-   uint32_t num_used_sq_block_sel;
-   struct ac_spm_block_select sq_block_sel[16];
+
+   struct {
+      uint32_t num_counters;
+      struct ac_spm_counter_select counters[16];
+   } sqg[AC_SPM_SEGMENT_TYPE_GLOBAL];
 
    /* Muxsel lines. */
    unsigned num_muxsel_lines[AC_SPM_SEGMENT_TYPE_COUNT];
index ab620f9..4a2ed63 100644 (file)
@@ -65,19 +65,25 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
    const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
    struct ac_spm *spm = &device->spm;
 
-   radeon_check_space(device->ws, cs, 3 + spm->num_used_sq_block_sel * 3);
+   for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
+      uint32_t num_counters = spm->sqg[instance].num_counters;
 
-   radeon_set_uconfig_reg(
-      cs, R_030800_GRBM_GFX_INDEX,
-      S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(0));
+      if (!num_counters)
+         continue;
 
-   for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) {
-      struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b];
-      const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
-      uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+      radeon_check_space(device->ws, cs, 3 + num_counters * 3);
 
-      radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1);
-      radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
+      radeon_set_uconfig_reg(
+         cs, R_030800_GRBM_GFX_INDEX,
+         S_030800_SH_BROADCAST_WRITES(1) | S_030800_INSTANCE_BROADCAST_WRITES(1) | S_030800_SE_INDEX(instance));
+
+      for (uint32_t b = 0; b < num_counters; b++) {
+         const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
+         uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+
+         radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1);
+         radeon_emit(cs, cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
+      }
    }
 
    for (uint32_t b = 0; b < spm->num_block_sel; b++) {
index de0fa34..139ce15 100644 (file)
@@ -733,18 +733,24 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
 
    radeon_begin(cs);
 
-   radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
-                          S_030800_SH_BROADCAST_WRITES(1) |
-                          S_030800_INSTANCE_BROADCAST_WRITES(1) |
-                          S_030800_SE_INDEX(0));
+   for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
+      uint32_t num_counters = spm->sqg[instance].num_counters;
+
+      if (!num_counters)
+         continue;
 
-   for (uint32_t b = 0; b < spm->num_used_sq_block_sel; b++) {
-      struct ac_spm_block_select *sq_block_sel = &spm->sq_block_sel[b];
-      const struct ac_spm_counter_select *cntr_sel = &sq_block_sel->counters[0];
-      uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX,
+                             S_030800_SH_BROADCAST_WRITES(1) |
+                             S_030800_INSTANCE_BROADCAST_WRITES(1) |
+                             S_030800_SE_INDEX(instance));
 
-      radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false);
-      radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
+      for (uint32_t b = 0; b < num_counters; b++) {
+         const struct ac_spm_counter_select *cntr_sel = &spm->sqg[instance].counters[b];
+         uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+
+         radeon_set_uconfig_reg_seq(reg_base + b * 4, 1, false);
+         radeon_emit(cntr_sel->sel0 | S_036700_SQC_BANK_MASK(0xf)); /* SQC_BANK_MASK only gfx10 */
+      }
    }
 
    for (uint32_t b = 0; b < spm->num_block_sel; b++) {