ac,radv,radeonsi: prepare support for multi-instance SPM generic counters
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 14 Sep 2023 15:39:00 +0000 (17:39 +0200)
committerMarge Bot <emma+marge@anholt.net>
Mon, 18 Sep 2023 07:07:31 +0000 (07:07 +0000)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25240>

src/amd/common/ac_spm.c
src/amd/common/ac_spm.h
src/amd/vulkan/radv_spm.c
src/gallium/drivers/radeonsi/si_perfcounter.c

index e7c32c3..448dcca 100644 (file)
@@ -125,15 +125,14 @@ ac_spm_get_block_select(struct ac_spm *spm, const struct ac_pc_block *block)
    memset(new_block_sel, 0, sizeof(*new_block_sel));
 
    new_block_sel->b = block;
-   new_block_sel->num_counters = block->b->b->num_spm_counters;
+   new_block_sel->instances =
+      calloc(block->num_global_instances, sizeof(*new_block_sel->instances));
+   if (!new_block_sel->instances)
+      return NULL;
+   new_block_sel->num_instances = block->num_global_instances;
 
-   /* Broadcast global block writes to SEs and SAs */
-   if (!(block->b->b->flags & (AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER)))
-      new_block_sel->grbm_gfx_index = S_030800_SE_BROADCAST_WRITES(1) |
-                                      S_030800_SH_BROADCAST_WRITES(1);
-   /* Broadcast per SE block writes to SAs */
-   else if (block->b->b->flags & AC_PC_BLOCK_SE)
-      new_block_sel->grbm_gfx_index = S_030800_SH_BROADCAST_WRITES(1);
+   for (unsigned i = 0; i < new_block_sel->num_instances; i++)
+      new_block_sel->instances[i].num_counters = block->b->b->num_spm_counters;
 
    return new_block_sel;
 }
@@ -197,9 +196,37 @@ ac_spm_init_muxsel(const struct ac_pc_block *block,
    muxsel->instance = mapping->instance_index;
 }
 
+static uint32_t
+ac_spm_init_grbm_gfx_index(const struct ac_pc_block *block,
+                           const struct ac_spm_instance_mapping *mapping)
+{
+   uint32_t grbm_gfx_index = 0;
+
+   grbm_gfx_index |= S_030800_SE_INDEX(mapping->se_index) |
+                     S_030800_SH_INDEX(mapping->sa_index) |
+                     S_030800_INSTANCE_INDEX(mapping->instance_index);
+
+   switch (block->b->b->gpu_block) {
+   case GL2C:
+      /* Global blocks. */
+      grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
+      break;
+   case SQ:
+      /* Per-SE blocks. */
+      grbm_gfx_index |= S_030800_SH_BROADCAST_WRITES(1);
+      break;
+   default:
+      /* Other blocks shouldn't broadcast. */
+      break;
+   }
+
+   return grbm_gfx_index;
+}
+
 static bool
 ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel,
                    struct ac_spm_counter_info *counter,
+                   const struct ac_spm_instance_mapping *mapping,
                    uint32_t *spm_wire)
 {
    uint32_t instance = counter->instance;
@@ -228,8 +255,16 @@ ac_spm_map_counter(struct ac_spm *spm, struct ac_spm_block_select *block_sel,
       }
    } else {
       /* Generic blocks. */
-      for (unsigned i = 0; i < block_sel->num_counters; i++) {
-         struct ac_spm_counter_select *cntr_sel = &block_sel->counters[i];
+      struct ac_spm_block_instance *block_instance =
+         &block_sel->instances[instance];
+
+      if (!block_instance->grbm_gfx_index) {
+         block_instance->grbm_gfx_index =
+            ac_spm_init_grbm_gfx_index(block_sel->b, mapping);
+      }
+
+      for (unsigned i = 0; i < block_instance->num_counters; i++) {
+         struct ac_spm_counter_select *cntr_sel = &block_instance->counters[i];
          int index = ffs(~cntr_sel->active) - 1;
 
          switch (index) {
@@ -320,7 +355,7 @@ ac_spm_add_counter(const struct radeon_info *info,
    }
 
    /* Map the counter to the select block. */
-   if (!ac_spm_map_counter(spm, block_sel, counter, &spm_wire)) {
+   if (!ac_spm_map_counter(spm, block_sel, counter, &instance_mapping, &spm_wire)) {
       fprintf(stderr, "ac/spm: No free slots available!\n");
       return false;
    }
@@ -462,6 +497,11 @@ void ac_destroy_spm(struct ac_spm *spm)
    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
       FREE(spm->muxsel_lines[s]);
    }
+
+   for (unsigned i = 0; i < spm->num_block_sel; i++) {
+      FREE(spm->block_sel[i].instances);
+   }
+
    FREE(spm->block_sel);
    FREE(spm->counters);
 }
index eaeb268..5f05129 100644 (file)
@@ -122,14 +122,20 @@ struct ac_spm_counter_select {
    uint32_t sel1;
 };
 
-struct ac_spm_block_select {
-   const struct ac_pc_block *b;
+struct ac_spm_block_instance {
    uint32_t grbm_gfx_index;
 
    uint32_t num_counters;
    struct ac_spm_counter_select counters[AC_SPM_MAX_COUNTER_PER_BLOCK];
 };
 
+struct ac_spm_block_select {
+   const struct ac_pc_block *b;
+
+   uint32_t num_instances;
+   struct ac_spm_block_instance *instances;
+};
+
 struct ac_spm {
    /* struct radeon_winsys_bo or struct pb_buffer */
    void *bo;
index 4a2ed63..a1d6357 100644 (file)
@@ -90,21 +90,25 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
       struct ac_spm_block_select *block_sel = &spm->block_sel[b];
       struct ac_pc_block_base *regs = block_sel->b->b->b;
 
-      radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
+      for (unsigned i = 0; i < block_sel->num_instances; i++) {
+         struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
 
-      radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
+         radeon_check_space(device->ws, cs, 3 + (AC_SPM_MAX_COUNTER_PER_BLOCK * 6));
 
-      for (unsigned c = 0; c < block_sel->num_counters; c++) {
-         const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c];
+         radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
 
-         if (!cntr_sel->active)
-            continue;
+         for (unsigned c = 0; c < block_instance->num_counters; c++) {
+            const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
 
-         radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select0[c], 1);
-         radeon_emit(cs, cntr_sel->sel0);
+            if (!cntr_sel->active)
+               continue;
 
-         radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select1[c], 1);
-         radeon_emit(cs, cntr_sel->sel1);
+            radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select0[c], 1);
+            radeon_emit(cs, cntr_sel->sel0);
+
+            radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, regs->select1[c], 1);
+            radeon_emit(cs, cntr_sel->sel1);
+         }
       }
    }
 
index 139ce15..0a68d7e 100644 (file)
@@ -757,19 +757,23 @@ si_emit_spm_counters(struct si_context *sctx, struct radeon_cmdbuf *cs)
       struct ac_spm_block_select *block_sel = &spm->block_sel[b];
       struct ac_pc_block_base *regs = block_sel->b->b->b;
 
-      radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_sel->grbm_gfx_index);
+      for (unsigned i = 0; i < block_sel->num_instances; i++) {
+         struct ac_spm_block_instance *block_instance = &block_sel->instances[i];
 
-      for (unsigned c = 0; c < block_sel->num_counters; c++) {
-         const struct ac_spm_counter_select *cntr_sel = &block_sel->counters[c];
+         radeon_set_uconfig_reg(R_030800_GRBM_GFX_INDEX, block_instance->grbm_gfx_index);
 
-         if (!cntr_sel->active)
-            continue;
+         for (unsigned c = 0; c < block_instance->num_counters; c++) {
+            const struct ac_spm_counter_select *cntr_sel = &block_instance->counters[c];
 
-         radeon_set_uconfig_reg_seq(regs->select0[c], 1, false);
-         radeon_emit(cntr_sel->sel0);
+            if (!cntr_sel->active)
+               continue;
 
-         radeon_set_uconfig_reg_seq(regs->select1[c], 1, false);
-         radeon_emit(cntr_sel->sel1);
+            radeon_set_uconfig_reg_seq(regs->select0[c], 1, false);
+            radeon_emit(cntr_sel->sel0);
+
+            radeon_set_uconfig_reg_seq(regs->select1[c], 1, false);
+            radeon_emit(cntr_sel->sel1);
+         }
       }
    }