ac,radv,radeonsi: rework SPM counters configuration and share it
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 12 Sep 2023 08:28:21 +0000 (10:28 +0200)
committerMarge Bot <emma+marge@anholt.net>
Thu, 14 Sep 2023 12:30:52 +0000 (12:30 +0000)
This should be easier to add GFX11 support.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25175>

src/amd/common/ac_spm.c
src/amd/common/ac_spm.h
src/amd/vulkan/radv_spm.c
src/gallium/drivers/radeonsi/si_perfcounter.c

index 883a6c2..7d3c73a 100644 (file)
 #include "util/u_memory.h"
 #include "ac_perfcounter.h"
 
+/* SPM counters definition. */
+/* GFX10+ */
+static struct ac_spm_counter_descr gfx10_num_l2_hits = {TCP, 0, 0x9};
+static struct ac_spm_counter_descr gfx10_num_l2_misses = {TCP, 0, 0x12};
+static struct ac_spm_counter_descr gfx10_num_scache_hits = {SQ, 0, 0x14f};
+static struct ac_spm_counter_descr gfx10_num_scache_misses = {SQ, 0, 0x150};
+static struct ac_spm_counter_descr gfx10_num_scache_misses_dup = {SQ, 0, 0x151};
+static struct ac_spm_counter_descr gfx10_num_icache_hits = {SQ, 0, 0x12c};
+static struct ac_spm_counter_descr gfx10_num_icache_misses = {SQ, 0, 0x12d};
+static struct ac_spm_counter_descr gfx10_num_icache_misses_dup = {SQ, 0, 0x12e};
+static struct ac_spm_counter_descr gfx10_num_gl1c_hits = {GL1C, 0, 0xe};
+static struct ac_spm_counter_descr gfx10_num_gl1c_misses = {GL1C, 0, 0x12};
+static struct ac_spm_counter_descr gfx10_num_gl2c_hits = {GL2C, 0, 0x3};
+static struct ac_spm_counter_descr gfx10_num_gl2c_misses = {GL2C, 0, 0x23};
+
+static struct ac_spm_counter_create_info gfx10_spm_counters[] = {
+   {&gfx10_num_l2_hits},
+   {&gfx10_num_l2_misses},
+   {&gfx10_num_scache_hits},
+   {&gfx10_num_scache_misses},
+   {&gfx10_num_scache_misses_dup},
+   {&gfx10_num_icache_hits},
+   {&gfx10_num_icache_misses},
+   {&gfx10_num_icache_misses_dup},
+   {&gfx10_num_gl1c_hits},
+   {&gfx10_num_gl1c_misses},
+   {&gfx10_num_gl2c_hits},
+   {&gfx10_num_gl2c_misses},
+};
+
+/* GFX10.3+ */
+static struct ac_spm_counter_descr gfx103_num_gl2c_misses = {GL2C, 0, 0x2b};
+
+static struct ac_spm_counter_create_info gfx103_spm_counters[] = {
+   {&gfx10_num_l2_hits},
+   {&gfx10_num_l2_misses},
+   {&gfx10_num_scache_hits},
+   {&gfx10_num_scache_misses},
+   {&gfx10_num_scache_misses_dup},
+   {&gfx10_num_icache_hits},
+   {&gfx10_num_icache_misses},
+   {&gfx10_num_icache_misses_dup},
+   {&gfx10_num_gl1c_hits},
+   {&gfx10_num_gl1c_misses},
+   {&gfx10_num_gl2c_hits},
+   {&gfx103_num_gl2c_misses},
+};
+
+static struct ac_spm_counter_create_info *
+ac_spm_get_counters(const struct radeon_info *info, unsigned *num_counters)
+{
+   switch (info->gfx_level) {
+   case GFX10:
+      *num_counters = ARRAY_SIZE(gfx10_spm_counters);
+      return gfx10_spm_counters;
+   case GFX10_3:
+      *num_counters = ARRAY_SIZE(gfx103_spm_counters);
+      return gfx103_spm_counters;
+   default:
+      unreachable("invalid gfx_level for SPM counters");
+   }
+}
+
 static struct ac_spm_block_select *
 ac_spm_get_block_select(struct ac_spm *spm, const struct ac_pc_block *block)
 {
@@ -143,20 +206,20 @@ ac_spm_add_counter(const struct ac_perfcounters *pc,
    uint32_t spm_wire;
 
    /* Check if the GPU block is valid. */
-   block = ac_pc_get_block(pc, info->gpu_block);
+   block = ac_pc_get_block(pc, info->b->gpu_block);
    if (!block) {
       fprintf(stderr, "ac/spm: Invalid GPU block.\n");
       return false;
    }
 
    /* Check if the number of instances is valid. */
-   if (info->instance > block->num_instances) {
+   if (info->b->instance > block->num_instances) {
       fprintf(stderr, "ac/spm: Invalid instance ID.\n");
       return false;
    }
 
    /* Check if the event ID is valid. */
-   if (info->event_id > block->b->selectors) {
+   if (info->b->event_id > block->b->selectors) {
       fprintf(stderr, "ac/spm: Invalid event ID.\n");
       return false;
    }
@@ -164,9 +227,9 @@ ac_spm_add_counter(const struct ac_perfcounters *pc,
    counter = &spm->counters[spm->num_counters];
    spm->num_counters++;
 
-   counter->gpu_block = info->gpu_block;
-   counter->instance = info->instance;
-   counter->event_id = info->event_id;
+   counter->gpu_block = info->b->gpu_block;
+   counter->instance = info->b->instance;
+   counter->event_id = info->b->event_id;
 
    /* Get the select block used to configure the counter. */
    block_sel = ac_spm_get_block_select(spm, block);
@@ -194,10 +257,11 @@ ac_spm_add_counter(const struct ac_perfcounters *pc,
 
 bool ac_init_spm(const struct radeon_info *info,
                  const struct ac_perfcounters *pc,
-                 unsigned num_counters,
-                 const struct ac_spm_counter_create_info *counters,
                  struct ac_spm *spm)
 {
+   unsigned num_counters;
+   const struct ac_spm_counter_create_info *counters = ac_spm_get_counters(info, &num_counters);
+
    spm->counters = CALLOC(num_counters, sizeof(*spm->counters));
    if (!spm->counters)
       return false;
index 09a3566..9c9b15f 100644 (file)
@@ -26,12 +26,16 @@ enum ac_spm_segment_type {
    AC_SPM_SEGMENT_TYPE_COUNT,
 };
 
-struct ac_spm_counter_create_info {
+struct ac_spm_counter_descr {
    enum ac_pc_gpu_block gpu_block;
    uint32_t instance;
    uint32_t event_id;
 };
 
+struct ac_spm_counter_create_info {
+   struct ac_spm_counter_descr *b;
+};
+
 struct ac_spm_muxsel {
    uint16_t counter      : 6;
    uint16_t block        : 4;
@@ -105,8 +109,6 @@ struct ac_spm_trace {
 
 bool ac_init_spm(const struct radeon_info *info,
                  const struct ac_perfcounters *pc,
-                 unsigned num_counters,
-                 const struct ac_spm_counter_create_info *counters,
                  struct ac_spm *spm);
 void ac_destroy_spm(struct ac_spm *spm);
 
index 6c076f1..fcb02fa 100644 (file)
@@ -182,26 +182,12 @@ radv_spm_init(struct radv_device *device)
 {
    const struct radeon_info *info = &device->physical_device->rad_info;
    struct ac_perfcounters *pc = &device->physical_device->ac_perfcounters;
-   struct ac_spm_counter_create_info spm_counters[] = {
-      {TCP, 0, 0x9},                                       /* Number of L2 requests. */
-      {TCP, 0, 0x12},                                      /* Number of L2 misses. */
-      {SQ, 0, 0x14f},                                      /* Number of SCACHE hits. */
-      {SQ, 0, 0x150},                                      /* Number of SCACHE misses. */
-      {SQ, 0, 0x151},                                      /* Number of SCACHE misses duplicate. */
-      {SQ, 0, 0x12c},                                      /* Number of ICACHE hits. */
-      {SQ, 0, 0x12d},                                      /* Number of ICACHE misses. */
-      {SQ, 0, 0x12e},                                      /* Number of ICACHE misses duplicate. */
-      {GL1C, 0, 0xe},                                      /* Number of GL1C requests. */
-      {GL1C, 0, 0x12},                                     /* Number of GL1C misses. */
-      {GL2C, 0, 0x3},                                      /* Number of GL2C requests. */
-      {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23}, /* Number of GL2C misses. */
-   };
 
    /* We failed to initialize the performance counters. */
    if (!pc->blocks)
       return false;
 
-   if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &device->spm))
+   if (!ac_init_spm(info, pc, &device->spm))
       return false;
 
    if (!radv_spm_init_bo(device))
index cd57dc6..030557b 100644 (file)
@@ -868,35 +868,11 @@ si_spm_init(struct si_context *sctx)
    sctx->screen->perfcounters->num_instance_cs_dwords = 3;
 
    struct ac_perfcounters *pc = &sctx->screen->perfcounters->base;
-   struct ac_spm_counter_create_info spm_counters[] = {
-
-      /* XXX: doesn't work */
-      {TCP, 0, 0x9},    /* Number of L2 requests. */
-      {TCP, 0, 0x12},   /* Number of L2 misses. */
-
-      /* Scalar cache hit */
-      {SQ, 0, 0x14f},   /* Number of SCACHE hits. */
-      {SQ, 0, 0x150},   /* Number of SCACHE misses. */
-      {SQ, 0, 0x151},   /* Number of SCACHE misses duplicate. */
-
-      /* Instruction cache hit */
-      {SQ, 0, 0x12c},   /* Number of ICACHE hits. */
-      {SQ, 0, 0x12d},   /* Number of ICACHE misses. */
-      {SQ, 0, 0x12e},   /* Number of ICACHE misses duplicate. */
-
-      /* XXX: doesn't work */
-      {GL1C, 0, 0xe},   /* Number of GL1C requests. */
-      {GL1C, 0, 0x12},  /* Number of GL1C misses. */
-
-      /* L2 cache hit */
-      {GL2C, 0, 0x3},   /* Number of GL2C requests. */
-      {GL2C, 0, info->gfx_level >= GFX10_3 ? 0x2b : 0x23},  /* Number of GL2C misses. */
-   };
 
    if (!ac_init_perfcounters(info, false, false, pc))
       return false;
 
-   if (!ac_init_spm(info, pc, ARRAY_SIZE(spm_counters), spm_counters, &sctx->spm))
+   if (!ac_init_spm(info, pc, &sctx->spm))
       return false;
 
    if (!si_spm_init_bo(sctx))