From 94958e637d11d9dbe9345da1a8ff4048ef441389 Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 24 May 2023 16:24:35 +0100 Subject: [PATCH] aco: improve printing of s_delay_alu Signed-off-by: Rhys Perry Reviewed-by: Georg Lehmann Part-of: --- src/amd/compiler/aco_builder_h.py | 15 +++++++++++++++ src/amd/compiler/aco_insert_waitcnt.cpp | 16 +--------------- src/amd/compiler/aco_print_ir.cpp | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index 6bc0185..22e17f2 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -136,6 +136,21 @@ enum bperm_swiz { bperm_255 = 13, }; +enum class alu_delay_wait { + NO_DEP = 0, + VALU_DEP_1 = 1, + VALU_DEP_2 = 2, + VALU_DEP_3 = 3, + VALU_DEP_4 = 4, + TRANS32_DEP_1 = 5, + TRANS32_DEP_2 = 6, + TRANS32_DEP_3 = 7, + FMA_ACCUM_CYCLE_1 = 8, + SALU_CYCLE_1 = 9, + SALU_CYCLE_2 = 10, + SALU_CYCLE_3 = 11, +}; + class Builder { public: struct Result { diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index ad44b9e..9643a9e 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -22,6 +22,7 @@ * */ +#include "aco_builder.h" #include "aco_ir.h" #include "common/sid.h" @@ -167,21 +168,6 @@ struct alu_delay_info { } }; -enum class alu_delay_wait { - NO_DEP, - VALU_DEP_1, - VALU_DEP_2, - VALU_DEP_3, - VALU_DEP_4, - TRANS32_DEP_1, - TRANS32_DEP_2, - TRANS32_DEP_3, - FMA_ACCUM_CYCLE_1, - SALU_CYCLE_1, - SALU_CYCLE_2, - SALU_CYCLE_3 -}; - uint8_t get_counters_for_event(wait_event ev) { diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index c069baa..37db332 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -322,6 +322,31 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins fprintf(output, " sa_sdst(%d)", sa_sdst); break; } + case aco_opcode::s_delay_alu: { + unsigned delay[2] = {imm & 0xfu, (imm >> 7) & 0xfu}; + unsigned skip = (imm >> 4) & 0x3; + for (unsigned i = 0; i < 2; i++) { + if (i == 1 && skip) { + if (skip == 1) + fprintf(output, " next"); + else + fprintf(output, " skip_%u", skip - 1); + } + + alu_delay_wait wait = (alu_delay_wait)delay[i]; + if (wait >= alu_delay_wait::VALU_DEP_1 && wait <= alu_delay_wait::VALU_DEP_4) + fprintf(output, " valu_dep_%u", delay[i]); + else if (wait >= alu_delay_wait::TRANS32_DEP_1 && wait <= alu_delay_wait::TRANS32_DEP_3) + fprintf(output, " trans32_dep_%u", + delay[i] - (unsigned)alu_delay_wait::TRANS32_DEP_1 + 1); + else if (wait == alu_delay_wait::FMA_ACCUM_CYCLE_1) + fprintf(output, " fma_accum_cycle_1"); + else if (wait >= alu_delay_wait::SALU_CYCLE_1 && wait <= alu_delay_wait::SALU_CYCLE_3) + fprintf(output, " salu_cycle_%u", + delay[i] - (unsigned)alu_delay_wait::SALU_CYCLE_1 + 1); + } + break; + } case aco_opcode::s_endpgm: case aco_opcode::s_endpgm_saved: case aco_opcode::s_endpgm_ordered_ps_done: -- 2.7.4