radv: add SPM support for GFX11
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 12 Sep 2023 15:12:04 +0000 (17:12 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 25 Sep 2023 07:05:58 +0000 (09:05 +0200)
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25271>

src/amd/vulkan/radv_perfcounter.c
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_spm.c
src/amd/vulkan/radv_sqtt.c

index c8746ae..884fba6 100644 (file)
 #include "sid.h"
 
 void
-radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders)
+radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders)
 {
-   radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
-   radeon_emit(cs, shaders & 0x7f);
-   radeon_emit(cs, 0xffffffff);
+   if (device->physical_device->rad_info.gfx_level >= GFX11) {
+      radeon_set_uconfig_reg(cs, R_036760_SQG_PERFCOUNTER_CTRL, shaders & 0x7f);
+   } else {
+      radeon_set_uconfig_reg_seq(cs, R_036780_SQ_PERFCOUNTER_CTRL, 2);
+      radeon_emit(cs, shaders & 0x7f);
+      radeon_emit(cs, 0xffffffff);
+   }
 }
 
 static void
@@ -644,7 +648,7 @@ radv_pc_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_pc_query_poo
 
    radv_emit_inhibit_clockgating(cmd_buffer->device, cs, true);
    radv_emit_spi_config_cntl(cmd_buffer->device, cs, true);
-   radv_perfcounter_emit_shaders(cs, 0x7f);
+   radv_perfcounter_emit_shaders(cmd_buffer->device, cs, 0x7f);
 
    for (unsigned pass = 0; pass < pool->num_passes; ++pass) {
       uint64_t pred_va = radv_buffer_get_va(cmd_buffer->device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET + 8 * pass;
index fd96218..6266fe3 100644 (file)
@@ -3685,7 +3685,7 @@ radv_has_pops(const struct radv_physical_device *pdevice)
 }
 
 /* radv_perfcounter.c */
-void radv_perfcounter_emit_shaders(struct radeon_cmdbuf *cs, unsigned shaders);
+void radv_perfcounter_emit_shaders(struct radv_device *device, struct radeon_cmdbuf *cs, unsigned shaders);
 void radv_perfcounter_emit_spm_reset(struct radeon_cmdbuf *cs);
 void radv_perfcounter_emit_spm_start(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
 void radv_perfcounter_emit_spm_stop(struct radv_device *device, struct radeon_cmdbuf *cs, int family);
index a1d6357..ba8b705 100644 (file)
@@ -65,6 +65,27 @@ radv_emit_spm_counters(struct radv_device *device, struct radeon_cmdbuf *cs, enu
    const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
    struct ac_spm *spm = &device->spm;
 
+   if (gfx_level >= GFX11) {
+      for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sq_wgp); instance++) {
+         uint32_t num_counters = spm->sq_wgp[instance].num_counters;
+
+         if (!num_counters)
+            continue;
+
+         radeon_check_space(device->ws, cs, 3 + num_counters * 3);
+
+         radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX, spm->sq_wgp[instance].grbm_gfx_index);
+
+         for (uint32_t b = 0; b < num_counters; b++) {
+            const struct ac_spm_counter_select *cntr_sel = &spm->sq_wgp[instance].counters[b];
+            uint32_t reg_base = R_036700_SQ_PERFCOUNTER0_SELECT;
+
+            radeon_set_uconfig_reg_seq_perfctr(gfx_level, qf, cs, reg_base + b * 4, 1);
+            radeon_emit(cs, cntr_sel->sel0);
+         }
+      }
+   }
+
    for (uint32_t instance = 0; instance < ARRAY_SIZE(spm->sqg); instance++) {
       uint32_t num_counters = spm->sqg[instance].num_counters;
 
@@ -148,15 +169,25 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
    }
 
    radeon_set_uconfig_reg(cs, R_03726C_RLC_SPM_ACCUM_MODE, 0);
-   radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
-   radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
-                          S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
-                             S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
-                             S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
-                             S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
-   radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
-                          S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
-                             S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
+
+   if (device->physical_device->rad_info.gfx_level >= GFX11) {
+      radeon_set_uconfig_reg(cs, R_03721C_RLC_SPM_PERFMON_SEGMENT_SIZE,
+                             S_03721C_TOTAL_NUM_SEGMENT(total_muxsel_lines) |
+                                S_03721C_GLOBAL_NUM_SEGMENT(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]) |
+                                S_03721C_SE_NUM_SEGMENT(spm->max_se_muxsel_lines));
+
+      radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_RING_WRPTR, 0);
+   } else {
+      radeon_set_uconfig_reg(cs, R_037210_RLC_SPM_PERFMON_SEGMENT_SIZE, 0);
+      radeon_set_uconfig_reg(cs, R_03727C_RLC_SPM_PERFMON_SE3TO0_SEGMENT_SIZE,
+                             S_03727C_SE0_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE0]) |
+                                S_03727C_SE1_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE1]) |
+                                S_03727C_SE2_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE2]) |
+                                S_03727C_SE3_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_SE3]));
+      radeon_set_uconfig_reg(cs, R_037280_RLC_SPM_PERFMON_GLB_SEGMENT_SIZE,
+                             S_037280_PERFMON_SEGMENT_SIZE(total_muxsel_lines) |
+                                S_037280_GLOBAL_NUM_LINE(spm->num_muxsel_lines[AC_SPM_SEGMENT_TYPE_GLOBAL]));
+   }
 
    /* Upload each muxsel ram to the RLC. */
    for (unsigned s = 0; s < AC_SPM_SEGMENT_TYPE_COUNT; s++) {
@@ -169,13 +200,15 @@ radv_emit_spm_setup(struct radv_device *device, struct radeon_cmdbuf *cs, enum r
       if (s == AC_SPM_SEGMENT_TYPE_GLOBAL) {
          grbm_gfx_index |= S_030800_SE_BROADCAST_WRITES(1);
 
-         rlc_muxsel_addr = R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
-         rlc_muxsel_data = R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
+         rlc_muxsel_addr =
+            gfx_level >= GFX11 ? R_037220_RLC_SPM_GLOBAL_MUXSEL_ADDR : R_037224_RLC_SPM_GLOBAL_MUXSEL_ADDR;
+         rlc_muxsel_data =
+            gfx_level >= GFX11 ? R_037224_RLC_SPM_GLOBAL_MUXSEL_DATA : R_037228_RLC_SPM_GLOBAL_MUXSEL_DATA;
       } else {
          grbm_gfx_index |= S_030800_SE_INDEX(s);
 
-         rlc_muxsel_addr = R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
-         rlc_muxsel_data = R_037220_RLC_SPM_SE_MUXSEL_DATA;
+         rlc_muxsel_addr = gfx_level >= GFX11 ? R_037228_RLC_SPM_SE_MUXSEL_ADDR : R_03721C_RLC_SPM_SE_MUXSEL_ADDR;
+         rlc_muxsel_data = gfx_level >= GFX11 ? R_03722C_RLC_SPM_SE_MUXSEL_DATA : R_037220_RLC_SPM_SE_MUXSEL_DATA;
       }
 
       radeon_check_space(device->ws, cs, 3 + spm->num_muxsel_lines[s] * (7 + AC_SPM_MUXSEL_LINE_SIZE));
index 4e72271..bdb9ec4 100644 (file)
@@ -685,7 +685,7 @@ radv_begin_sqtt(struct radv_queue *queue)
 
    if (device->spm.bo) {
       /* Enable all shader stages by default. */
-      radv_perfcounter_emit_shaders(cs, ac_sqtt_get_shader_mask(&device->physical_device->rad_info));
+      radv_perfcounter_emit_shaders(device, cs, ac_sqtt_get_shader_mask(&device->physical_device->rad_info));
 
       radv_emit_spm_setup(device, cs, family);
    }