From: Georg Lehmann Date: Fri, 3 Feb 2023 12:08:14 +0000 (+0100) Subject: aco: treat VINTERP_INREG as VALU X-Git-Tag: upstream/23.3.3~12036 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=77afe7d960429429b72d76ea2f9ae4a2dd2e6d70;p=platform%2Fupstream%2Fmesa.git aco: treat VINTERP_INREG as VALU It's just v_fma with fixed DPP8 and builtin s_waitcnt_expcnt, so it can mostly be handled as a pure VALU instruction. Reviewed-by: Daniel Schürmann Reviewed-by: Timur Kristóf Part-of: --- diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index e9a0c32..243388a 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -1054,7 +1054,7 @@ handle_lds_direct_valu_hazard_instr(LdsDirectVALUHazardGlobalState& global_state LdsDirectVALUHazardBlockState& block_state, aco_ptr& instr) { - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { block_state.has_trans |= instr->isTrans(); bool uses_vgpr = false; @@ -1153,7 +1153,7 @@ handle_valu_partial_forwarding_hazard_instr(VALUPartialForwardingHazardGlobalSta if (instr->isSALU() && !instr->definitions.empty()) { if (block_state.state == written_after_exec_write && instr_writes_exec(instr)) block_state.state = exec_written; - } else if (instr->isVALU() || instr->isVINTERP_INREG()) { + } else if (instr->isVALU()) { bool vgpr_write = false; for (Definition& def : instr->definitions) { if (def.physReg().reg() < 256) @@ -1239,7 +1239,7 @@ handle_valu_partial_forwarding_hazard(State& state, aco_ptr& instr) * For the hazard, there must be less than 3 VALU between the first and second VGPR writes. * There also must be less than 5 VALU between the second VGPR write and the current instruction. */ - if (state.program->wave_size != 64 || (!instr->isVALU() && !instr->isVINTERP_INREG())) + if (state.program->wave_size != 64 || !instr->isVALU()) return false; unsigned num_vgprs = 0; @@ -1319,7 +1319,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& * VALU reads VGPR written by transcendental instruction without 6+ VALU or 2+ transcendental * in-between. */ - if (va_vdst > 0 && (instr->isVALU() || instr->isVINTERP_INREG())) { + if (va_vdst > 0 && instr->isVALU()) { uint8_t num_valu = 15; uint8_t num_trans = 15; for (Operand& op : instr->operands) { @@ -1362,7 +1362,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& if (sa_sdst == 0) ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.reset(); - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { bool is_trans = instr->isTrans(); ctx.valu_since_wr_by_trans.inc(); @@ -1419,7 +1419,7 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& for (Operand& op : instr->operands) fill_vgpr_bitset(ctx.vgpr_used_by_ds, op.physReg(), op.bytes()); } - if (instr->isVALU() || instr->isVINTERP_INREG() || instr->isEXP() || vm_vsrc == 0) { + if (instr->isVALU() || instr->isEXP() || vm_vsrc == 0) { ctx.vgpr_used_by_vmem_load.reset(); ctx.vgpr_used_by_vmem_store.reset(); ctx.vgpr_used_by_ds.reset(); diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index b7edf04..948799f 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -372,7 +372,7 @@ check_instr(wait_ctx& ctx, wait_imm& wait, alu_delay_info& delay, Instruction* i continue; wait.combine(it->second.imm); - if (instr->isVALU() || instr->isSALU() || instr->isVINTERP_INREG()) + if (instr->isVALU() || instr->isSALU()) delay.combine(it->second.delay); } } @@ -788,7 +788,7 @@ void gen_alu(Instruction* instr, wait_ctx& ctx) { Instruction_cycle_info cycle_info = get_cycle_info(*ctx.program, *instr); - bool is_valu = instr->isVALU() || instr->isVINTERP_INREG(); + bool is_valu = instr->isVALU(); bool is_trans = instr->isTrans(); bool clear = instr->isEXP() || instr->isDS() || instr->isMIMG() || instr->isFlatLike() || instr->isMUBUF() || instr->isMTBUF(); diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 4652af8..c4f3367 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -343,7 +343,7 @@ can_use_DPP(const aco_ptr& instr, bool pre_ra, bool dpp8) if (instr->operands.size() && instr->operands[0].isLiteral()) return false; - if (instr->isSDWA() || instr->isVOP3P()) + if (instr->isSDWA() || instr->isVINTERP_INREG() || instr->isVOP3P()) return false; if (!pre_ra && (instr->isVOPC() || instr->definitions.size() > 1) && diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index f57ba4a..c3ad28a 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1372,7 +1372,7 @@ struct Instruction { constexpr bool isVALU() const noexcept { - return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P(); + return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P() || isVINTERP_INREG(); } constexpr bool isSALU() const noexcept diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index b0be28a..3477563 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2675,7 +2675,7 @@ lower_to_hw_instr(Program* program) can_remove = false; } else if (inst->isSALU()) { num_scalar++; - } else if (inst->isVALU() || inst->isVINTRP() || inst->isVINTERP_INREG()) { + } else if (inst->isVALU() || inst->isVINTRP()) { num_vector++; /* VALU which writes SGPRs are always executed on GFX10+ */ if (ctx.program->gfx_level >= GFX10) { diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 6435a54..b39af7b 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -568,7 +568,7 @@ num_encoded_alu_operands(const aco_ptr& instr) else if (instr->opcode == aco_opcode::v_writelane_b32_e64 || instr->opcode == aco_opcode::v_writelane_b32) return 2; /* potentially VOP3, but reads VDST as SRC2 */ - else if (instr->isVOP3() || instr->isVOP3P()) + else if (instr->isVOP3() || instr->isVOP3P() || instr->isVINTERP_INREG()) return instr->operands.size(); } diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index e8a1095..54d4174 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -504,7 +504,7 @@ get_subdword_operand_stride(amd_gfx_level gfx_level, const aco_ptr& } assert(rc.bytes() <= 2); - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { if (can_use_SDWA(gfx_level, instr, false)) return rc.bytes(); if (can_use_opsel(gfx_level, instr->opcode, idx)) @@ -539,7 +539,7 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx, uns return; assert(rc.bytes() <= 2); - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { /* check if we can use opsel */ if (instr->format == Format::VOP3) { assert(byte == 2); @@ -616,7 +616,7 @@ get_subdword_definition_info(Program* program, const aco_ptr& instr return std::make_pair(4, rc.size() * 4u); } - if (instr->isVALU() || instr->isVINTRP() || instr->isVINTERP_INREG()) { + if (instr->isVALU() || instr->isVINTRP()) { assert(rc.bytes() <= 2); if (can_use_SDWA(gfx_level, instr, false)) @@ -684,7 +684,7 @@ add_subdword_definition(Program* program, aco_ptr& instr, PhysReg r if (instr->isPseudo()) return; - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { amd_gfx_level gfx_level = program->gfx_level; assert(instr->definitions[0].bytes() <= 2); diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 0494f4c..599eb31 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -284,7 +284,7 @@ validate_ir(Program* program) instr.get()); } - if (instr->isSALU() || instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isSALU() || instr->isVALU()) { /* check literals */ Operand literal(s1); for (unsigned i = 0; i < instr->operands.size(); i++) { @@ -306,7 +306,7 @@ validate_ir(Program* program) } /* check num sgprs for VALU */ - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 || instr->opcode == aco_opcode::v_lshrrev_b64 || instr->opcode == aco_opcode::v_ashrrev_i64; @@ -929,7 +929,7 @@ get_subdword_bytes_written(Program* program, const aco_ptr& instr, if (instr->isPseudo()) return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u; - if (instr->isVALU() || instr->isVINTERP_INREG()) { + if (instr->isVALU()) { assert(def.bytes() <= 2); if (instr->isSDWA()) return instr->sdwa().dst_sel.size();