From 23edcf64909c7800c6d52a6bbed9d24945214370 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Timur=20Krist=C3=B3f?= Date: Tue, 7 Jan 2020 10:12:08 +0100 Subject: [PATCH] aco: Make a better guess at which instructions need the VCC hint. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Previously, bool_to_vector_condition would always set the VCC hint on its result. This commit improves it by having the optimizer set the VCC hint only when the result really needs to be in the VCC. Signed-off-by: Timur Kristóf Reviewed-by: Daniel Schürmann Tested-by: Marge Bot Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 2 +- src/amd/compiler/aco_optimizer.cpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 1cbfed4..897dbcb 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -356,7 +356,7 @@ Temp bool_to_vector_condition(isel_context *ctx, Temp val, Temp dst = Temp(0, s2 assert(val.regClass() == s1); assert(dst.regClass() == bld.lm); - return bld.sop2(Builder::s_cselect, bld.hint_vcc(Definition(dst)), Operand((uint32_t) -1), Operand(0u), bld.scc(val)); + return bld.sop2(Builder::s_cselect, Definition(dst), Operand((uint32_t) -1), Operand(0u), bld.scc(val)); } Temp bool_to_scalar_condition(isel_context *ctx, Temp val, Temp dst = Temp(0, s1)) diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 7e05204..1088d8a 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -85,6 +85,7 @@ enum Label { label_constant_64bit = 1 << 22, label_uniform_bitwise = 1 << 23, label_scc_invert = 1 << 24, + label_vcc_hint = 1 << 25, }; static constexpr uint32_t instr_labels = label_vec | label_mul | label_mad | label_omod_success | label_clamp_success | @@ -405,6 +406,15 @@ struct ssa_info { return label & label_uniform_bool; } + void set_vcc_hint() + { + add_label(label_vcc_hint); + } + + bool is_vcc_hint() + { + return label & label_vcc_hint; + } }; struct opt_ctx { @@ -1087,6 +1097,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) instr->operands[1].constantEquals(0x3f800000u) && instr->operands[2].isTemp()) ctx.info[instr->definitions[0].tempId()].set_b2f(instr->operands[2].getTemp()); + + ctx.info[instr->operands[2].tempId()].set_vcc_hint(); break; case aco_opcode::v_cmp_lg_u32: if (instr->format == Format::VOPC && /* don't optimize VOP3 / SDWA / DPP */ @@ -2235,6 +2247,10 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr return; } + if (ctx.info[instr->definitions[0].tempId()].is_vcc_hint()) { + instr->definitions[0].setHint(vcc); + } + /* TODO: There are still some peephole optimizations that could be done: * - abs(a - b) -> s_absdiff_i32 * - various patterns for s_bitcmp{0,1}_b32 and s_bitset{0,1}_b32 -- 2.7.4