amd: improve the IB parser, parse more packets
authorMarek Olšák <marek.olsak@amd.com>
Fri, 2 Jun 2023 18:33:10 +0000 (14:33 -0400)
committerMarge Bot <emma+marge@anholt.net>
Sat, 17 Jun 2023 23:42:20 +0000 (23:42 +0000)
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23687>

src/amd/common/ac_debug.c
src/amd/common/sid.h
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_cs.h

index b24572e..088ab09 100644 (file)
@@ -35,12 +35,14 @@ DEBUG_GET_ONCE_BOOL_OPTION(color, "AMD_COLOR", true);
 #define COLOR_GREEN  "\033[1;32m"
 #define COLOR_YELLOW "\033[1;33m"
 #define COLOR_CYAN   "\033[1;36m"
+#define COLOR_PURPLE "\033[1;35m"
 
 #define O_COLOR_RESET  (debug_get_option_color() ? COLOR_RESET : "")
 #define O_COLOR_RED    (debug_get_option_color() ? COLOR_RED : "")
 #define O_COLOR_GREEN  (debug_get_option_color() ? COLOR_GREEN : "")
 #define O_COLOR_YELLOW (debug_get_option_color() ? COLOR_YELLOW : "")
 #define O_COLOR_CYAN   (debug_get_option_color() ? COLOR_CYAN : "")
+#define O_COLOR_PURPLE (debug_get_option_color() ? COLOR_PURPLE : "")
 
 #define INDENT_PKT 8
 
@@ -84,6 +86,12 @@ static void print_value(FILE *file, uint32_t value, int bits)
    }
 }
 
+static void print_reserved_dword(FILE *file, uint32_t value)
+{
+   print_spaces(file, INDENT_PKT);
+   fprintf(file, "(reserved)\n");
+}
+
 static void print_named_value(FILE *file, const char *name, uint32_t value, int bits)
 {
    print_spaces(file, INDENT_PKT);
@@ -93,6 +101,15 @@ static void print_named_value(FILE *file, const char *name, uint32_t value, int
    print_value(file, value, bits);
 }
 
+static void print_string_value(FILE *file, const char *name, const char *value)
+{
+   print_spaces(file, INDENT_PKT);
+   fprintf(file, "%s%s%s <- ",
+           O_COLOR_YELLOW, name,
+           O_COLOR_RESET);
+   fprintf(file, "%s\n", value);
+}
+
 static const struct si_reg *find_register(enum amd_gfx_level gfx_level, enum radeon_family family,
                                           unsigned offset)
 {
@@ -242,39 +259,65 @@ static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset
    unsigned index = reg_dw >> 28;
    int i;
 
-   if (index != 0) {
-      print_spaces(f, INDENT_PKT);
-      fprintf(f, "INDEX = %u\n", index);
-   }
+   if (index != 0)
+      print_named_value(f, "INDEX", index, 32);
 
    for (i = 0; i < count; i++)
       ac_dump_reg(f, ib->gfx_level, ib->family, reg + i * 4, ac_ib_get(ib), ~0);
 }
 
+static void ac_parse_set_reg_pairs_packed_packet(FILE *f, unsigned count, unsigned reg_base,
+                                                 struct ac_ib_parser *ib)
+{
+   unsigned reg_offset0 = 0, reg_offset1 = 0;
+
+   print_named_value(f, "REG_COUNT", ac_ib_get(ib), 32);
+
+   for (unsigned i = 0; i < count; i++) {
+      if (i % 3 == 0) {
+         unsigned tmp = ac_ib_get(ib);
+         reg_offset0 = ((tmp & 0xffff) << 2) + reg_base;
+         reg_offset1 = ((tmp >> 16) << 2) + reg_base;
+      } else if (i % 3 == 1) {
+         ac_dump_reg(f, ib->gfx_level, ib->family, reg_offset0, ac_ib_get(ib), ~0);
+      } else {
+         ac_dump_reg(f, ib->gfx_level, ib->family, reg_offset1, ac_ib_get(ib), ~0);
+      }
+   }
+}
+
 static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
                              int *current_trace_id)
 {
    unsigned first_dw = ib->cur_dw;
    int count = PKT_COUNT_G(header);
    unsigned op = PKT3_IT_OPCODE_G(header);
-   const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : "";
+   const char *shader_type = PKT3_SHADER_TYPE_G(header) ? "(shader_type=compute)" : "";
+   const char *predicated = PKT3_PREDICATE(header) ? "(predicated)" : "";
+   const char *reset_filter_cam = PKT3_RESET_FILTER_CAM_G(header) ? "(reset_filter_cam)" : "";
    int i;
+   unsigned tmp;
 
    /* Print the name first. */
    for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
       if (packet3_table[i].op == op)
          break;
 
-   if (i < ARRAY_SIZE(packet3_table)) {
-      const char *name = sid_strings + packet3_table[i].name_offset;
+   const char *pkt_name = i < ARRAY_SIZE(packet3_table) ? sid_strings + packet3_table[i].name_offset
+                                                        : "UNKNOWN";
+   const char *color;
 
-      if (op == PKT3_SET_CONTEXT_REG || op == PKT3_SET_CONFIG_REG || op == PKT3_SET_UCONFIG_REG ||
-          op == PKT3_SET_UCONFIG_REG_INDEX || op == PKT3_SET_SH_REG || op == PKT3_SET_SH_REG_INDEX)
-         fprintf(f, "%s%s%s%s:\n", O_COLOR_CYAN, name, predicate, O_COLOR_RESET);
-      else
-         fprintf(f, "%s%s%s%s:\n", O_COLOR_GREEN, name, predicate, O_COLOR_RESET);
-   } else
-      fprintf(f, "%sPKT3_UNKNOWN 0x%x%s%s:\n", O_COLOR_RED, op, predicate, O_COLOR_RESET);
+   if (strstr(pkt_name, "DRAW") || strstr(pkt_name, "DISPATCH"))
+      color = O_COLOR_PURPLE;
+   else if (strstr(pkt_name, "SET") == pkt_name && strstr(pkt_name, "REG"))
+      color = O_COLOR_CYAN;
+   else if (i >= ARRAY_SIZE(packet3_table))
+      color = O_COLOR_RED;
+   else
+      color = O_COLOR_GREEN;
+
+   fprintf(f, "%s%s%s%s%s%s:\n", color, pkt_name, O_COLOR_RESET,
+           shader_type, predicated, reset_filter_cam);
 
    /* Print the contents. */
    switch (op) {
@@ -292,25 +335,44 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
    case PKT3_SET_SH_REG_INDEX:
       ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
       break;
+   case PKT3_SET_CONTEXT_REG_PAIRS_PACKED:
+      ac_parse_set_reg_pairs_packed_packet(f, count, SI_CONTEXT_REG_OFFSET, ib);
+      break;
+   case PKT3_SET_SH_REG_PAIRS_PACKED:
+   case PKT3_SET_SH_REG_PAIRS_PACKED_N:
+      ac_parse_set_reg_pairs_packed_packet(f, count, SI_SH_REG_OFFSET, ib);
+      break;
    case PKT3_ACQUIRE_MEM:
-      if (ib->gfx_level >= GFX11 && G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) {
-         ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0);
-         print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
-         print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
-         print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
-         print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
-         ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0);
-         ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
-         break;
+      if (ib->gfx_level >= GFX11) {
+         if (G_585_PWS_ENA(ib->ib[ib->cur_dw + 5])) {
+            ac_dump_reg(f, ib->gfx_level, ib->family, R_580_ACQUIRE_MEM_PWS_2, ac_ib_get(ib), ~0);
+            print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
+            print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
+            print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
+            print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
+            ac_dump_reg(f, ib->gfx_level, ib->family, R_585_ACQUIRE_MEM_PWS_7, ac_ib_get(ib), ~0);
+            ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
+         } else {
+            print_string_value(f, "ENGINE_SEL", ac_ib_get(ib) & 0x80000000 ? "ME" : "PFP");
+            print_named_value(f, "GCR_SIZE", ac_ib_get(ib), 32);
+            print_named_value(f, "GCR_SIZE_HI", ac_ib_get(ib), 25);
+            print_named_value(f, "GCR_BASE_LO", ac_ib_get(ib), 32);
+            print_named_value(f, "GCR_BASE_HI", ac_ib_get(ib), 32);
+            print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
+            ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
+         }
+      } else {
+         tmp = ac_ib_get(ib);
+         ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, tmp, 0x7fffffff);
+         print_string_value(f, "ENGINE_SEL", tmp & 0x80000000 ? "ME" : "PFP");
+         ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
+         ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
+         ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
+         ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
+         print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
+         if (ib->gfx_level >= GFX10)
+            ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
       }
-      ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
-      ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
-      ac_dump_reg(f, ib->gfx_level, ib->family, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
-      ac_dump_reg(f, ib->gfx_level, ib->family, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
-      ac_dump_reg(f, ib->gfx_level, ib->family, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
-      print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
-      if (ib->gfx_level >= GFX10)
-         ac_dump_reg(f, ib->gfx_level, ib->family, R_586_GCR_CNTL, ac_ib_get(ib), ~0);
       break;
    case PKT3_SURFACE_SYNC:
       if (ib->gfx_level >= GFX7) {
@@ -477,6 +539,7 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
    case PKT3_CLEAR_STATE:
    case PKT3_INCREMENT_DE_COUNTER:
    case PKT3_PFP_SYNC_ME:
+      print_reserved_dword(f, ac_ib_get(ib));
       break;
    case PKT3_NOP:
       if (header == PKT3_NOP_PAD) {
@@ -512,6 +575,22 @@ static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
          break;
       }
       break;
+   case PKT3_DISPATCH_DIRECT:
+      ac_dump_reg(f, ib->gfx_level, ib->family, R_00B804_COMPUTE_DIM_X, ac_ib_get(ib), ~0);
+      ac_dump_reg(f, ib->gfx_level, ib->family, R_00B808_COMPUTE_DIM_Y, ac_ib_get(ib), ~0);
+      ac_dump_reg(f, ib->gfx_level, ib->family, R_00B80C_COMPUTE_DIM_Z, ac_ib_get(ib), ~0);
+      ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
+                  ac_ib_get(ib), ~0);
+      break;
+   case PKT3_DISPATCH_INDIRECT:
+      print_named_value(f, "DATA_OFFSET", ac_ib_get(ib), 32);
+      ac_dump_reg(f, ib->gfx_level, ib->family, R_00B800_COMPUTE_DISPATCH_INITIATOR,
+                  ac_ib_get(ib), ~0);
+      break;
+   case PKT3_SET_BASE:
+      tmp = ac_ib_get(ib);
+      print_string_value(f, "BASE_INDEX", tmp == 1 ? "INDIRECT_BASE" : COLOR_RED "UNKNOWN" COLOR_RESET);
+      break;
    }
 
    /* print additional dwords */
index ac579c5..e2a82b1 100644 (file)
 #define PKT3_IT_OPCODE_G(x)   (((x) >> 8) & 0xFF)
 #define PKT3_IT_OPCODE_C      0xFFFF00FF
 #define PKT3_PREDICATE(x)     (((x) >> 0) & 0x1)
-#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
-#define PKT3_RESET_FILTER_CAM(x) (((unsigned)(x)&0x1) << 2)
+#define PKT3_SHADER_TYPE_S(x) (((unsigned)(x) & 0x1) << 1)
+#define PKT3_SHADER_TYPE_G(x) (((x) >> 1) & 0x1)
+#define PKT3_RESET_FILTER_CAM_S(x) (((unsigned)(x) & 0x1) << 2)
+#define PKT3_RESET_FILTER_CAM_G(x) (((unsigned)(x) >> 2) & 0x1)
 #define PKT3(op, count, predicate)                                                                 \
    (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
 
index 50ef321..1d1ad47 100644 (file)
@@ -7936,7 +7936,7 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
    uint32_t xyz_dim_enable = 1; /* TODO: disable XYZ_DIM when unneeded */
    uint32_t mode1_enable = 1;   /* legacy fast launch mode */
 
-   radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM(1));
+   radeon_emit(cs, PKT3(PKT3_DISPATCH_MESH_INDIRECT_MULTI, 7, predicating) | PKT3_RESET_FILTER_CAM_S(1));
    radeon_emit(cs, 0); /* data_offset */
    radeon_emit(cs, S_4C1_XYZ_DIM_REG(xyz_dim_reg) | S_4C1_DRAW_INDEX_REG(draw_id_reg));
    if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)
@@ -8036,7 +8036,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer)
    uint32_t mode1_en = 1;   /* legacy fast launch mode */
    uint32_t linear_dispatch_en = cmd_buffer->state.shaders[MESA_SHADER_TASK]->info.cs.linear_taskmesh_dispatch;
 
-   radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM(1));
+   radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating) | PKT3_RESET_FILTER_CAM_S(1));
    radeon_emit(cs, S_4D0_RING_ENTRY_REG(ring_entry_reg) | S_4D0_XYZ_DIM_REG(xyz_dim_reg));
    if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11)
       radeon_emit(cs, S_4D1_XYZ_DIM_ENABLE(xyz_dim_en) | S_4D1_MODE1_ENABLE(mode1_en) |
index afa7157..f8c69f5 100644 (file)
@@ -179,7 +179,7 @@ radeon_set_perfctr_reg(struct radv_cmd_buffer *cmd_buffer, unsigned reg, unsigne
    bool filter_cam_workaround =
       cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && cmd_buffer->qf == RADV_QUEUE_GENERAL;
 
-   radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM(filter_cam_workaround));
+   radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, 1, 0) | PKT3_RESET_FILTER_CAM_S(filter_cam_workaround));
    radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
    radeon_emit(cs, value);
 }