From 70dbcfa1c9e0b5fe609485b011c3ce9d0819a9ee Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Wed, 20 Jan 2021 14:49:08 +0000 Subject: [PATCH] aco: use instruction cast methods MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_assembler.cpp | 32 ++++++------ src/amd/compiler/aco_insert_NOPs.cpp | 20 ++++---- src/amd/compiler/aco_insert_exec_mask.cpp | 14 ++---- src/amd/compiler/aco_insert_waitcnt.cpp | 31 +++++------- src/amd/compiler/aco_instruction_selection.cpp | 31 ++++++------ src/amd/compiler/aco_ir.cpp | 18 +++---- src/amd/compiler/aco_ir.h | 4 +- src/amd/compiler/aco_lower_to_hw_instr.cpp | 20 ++++---- src/amd/compiler/aco_opt_value_numbering.cpp | 48 +++++++++--------- src/amd/compiler/aco_optimizer.cpp | 68 +++++++++++++------------- src/amd/compiler/aco_print_ir.cpp | 47 +++++++++--------- src/amd/compiler/aco_reduce_assign.cpp | 4 +- src/amd/compiler/aco_register_allocation.cpp | 16 +++--- src/amd/compiler/aco_scheduler.cpp | 12 ++--- src/amd/compiler/aco_spill.cpp | 10 ++-- src/amd/compiler/aco_ssa_elimination.cpp | 4 +- src/amd/compiler/aco_statistics.cpp | 2 +- src/amd/compiler/aco_validate.cpp | 14 +++--- src/amd/compiler/tests/test_optimizer.cpp | 4 +- src/amd/compiler/tests/test_to_hw_instr.cpp | 2 +- 20 files changed, 190 insertions(+), 211 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 3efdf66..eab8457 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -109,7 +109,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::SOPK: { - SOPK_instruction *sopk = static_cast(instr); + SOPK_instruction *sopk = instr->sopk(); if (instr->opcode == aco_opcode::s_subvector_loop_begin) { assert(ctx.chip_class >= GFX10); @@ -157,7 +157,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::SOPP: { - SOPP_instruction* sopp = static_cast(instr); + SOPP_instruction* sopp = instr->sopp(); uint32_t encoding = (0b101111111 << 23); encoding |= opcode << 16; encoding |= (uint16_t) sopp->imm; @@ -169,7 +169,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::SMEM: { - SMEM_instruction* smem = static_cast(instr); + SMEM_instruction* smem = instr->smem(); bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); bool is_load = !instr->definitions.empty(); uint32_t encoding = 0; @@ -284,7 +284,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::VINTRP: { - Interp_instruction* interp = static_cast(instr); + Interp_instruction* interp = instr->vintrp(); uint32_t encoding = 0; if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || @@ -334,7 +334,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::DS: { - DS_instruction* ds = static_cast(instr); + DS_instruction* ds = instr->ds(); uint32_t encoding = (0b110110 << 26); if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding |= opcode << 17; @@ -358,7 +358,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::MUBUF: { - MUBUF_instruction* mubuf = static_cast(instr); + MUBUF_instruction* mubuf = instr->mubuf(); uint32_t encoding = (0b111000 << 26); encoding |= opcode << 18; encoding |= (mubuf->lds ? 1 : 0) << 16; @@ -390,7 +390,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::MTBUF: { - MTBUF_instruction* mtbuf = static_cast(instr); + MTBUF_instruction* mtbuf = instr->mtbuf(); uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt); uint32_t encoding = (0b111010 << 26); @@ -437,7 +437,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* assert(!use_nsa || ctx.chip_class >= GFX10); unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0; - MIMG_instruction* mimg = static_cast(instr); + MIMG_instruction* mimg = instr->mimg(); uint32_t encoding = (0b111100 << 26); encoding |= mimg->slc ? 1 << 25 : 0; encoding |= opcode << 18; @@ -487,7 +487,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* case Format::FLAT: case Format::SCRATCH: case Format::GLOBAL: { - FLAT_instruction *flat = static_cast(instr); + FLAT_instruction *flat = instr->flatlike(); uint32_t encoding = (0b110111 << 26); encoding |= opcode << 18; if (ctx.chip_class <= GFX9) { @@ -536,7 +536,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::EXP: { - Export_instruction* exp = static_cast(instr); + Export_instruction* exp = instr->exp(); uint32_t encoding; if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding = (0b110001 << 26); @@ -564,7 +564,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; default: if ((uint16_t) instr->format & (uint16_t) Format::VOP3) { - VOP3_instruction* vop3 = static_cast(instr); + VOP3_instruction* vop3 = instr->vop3(); if ((uint16_t) instr->format & (uint16_t) Format::VOP2) { opcode = opcode + 0x100; @@ -615,7 +615,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* out.push_back(encoding); } else if (instr->format == Format::VOP3P) { - VOP3P_instruction* vop3 = static_cast(instr); + VOP3P_instruction* vop3 = instr->vop3p(); uint32_t encoding; if (ctx.chip_class == GFX9) { @@ -644,12 +644,13 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* } else if (instr->isDPP()){ assert(ctx.chip_class >= GFX8); + DPP_instruction* dpp = instr->dpp(); + /* first emit the instruction without the DPP operand */ Operand dpp_op = instr->operands[0]; instr->operands[0] = Operand(PhysReg{250}, v1); instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::DPP); emit_instruction(ctx, out, instr); - DPP_instruction* dpp = static_cast(instr); uint32_t encoding = (0xF & dpp->row_mask) << 28; encoding |= (0xF & dpp->bank_mask) << 24; encoding |= dpp->abs[1] << 23; @@ -664,13 +665,14 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* out.push_back(encoding); return; } else if (instr->isSDWA()) { + SDWA_instruction* sdwa = instr->sdwa(); + /* first emit the instruction without the SDWA operand */ Operand sdwa_op = instr->operands[0]; instr->operands[0] = Operand(PhysReg{249}, v1); instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::SDWA); emit_instruction(ctx, out, instr); - SDWA_instruction* sdwa = static_cast(instr); uint32_t encoding = 0; if ((uint16_t)instr->format & (uint16_t)Format::VOPC) { @@ -748,7 +750,7 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) while ( it != block.instructions.rend()) { if ((*it)->format == Format::EXP) { - Export_instruction* exp = static_cast((*it).get()); + Export_instruction* exp = (*it)->exp(); if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) { if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) { exp->done = true; diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index a609c18..a5df7bf 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -180,7 +180,7 @@ struct NOP_ctx_gfx10 { int get_wait_states(aco_ptr& instr) { if (instr->opcode == aco_opcode::s_nop) - return static_cast(instr.get())->imm + 1; + return instr->sopp()->imm + 1; else if (instr->opcode == aco_opcode::p_constaddr) return 3; /* lowered to 3 instructions in the assembler */ else @@ -351,7 +351,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata) NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace); - } else if (instr->format == Format::DS && static_cast(instr.get())->gds) { + } else if (instr->format == Format::DS && instr->ds()->gds) { NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace); } else if (instr->isVALU() || instr->format == Format::VINTRP) { for (Operand op : instr->operands) { @@ -407,7 +407,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c if (program->chip_class == GFX9) { bool lds_scratch_global = (instr->format == Format::SCRATCH || instr->format == Format::GLOBAL) && - static_cast(instr.get())->lds; + instr->flatlike()->lds; if (instr->format == Format::VINTRP || instr->opcode == aco_opcode::ds_read_addtid_b32 || instr->opcode == aco_opcode::ds_write_addtid_b32 || @@ -480,7 +480,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c ctx.salu_wr_m0_then_moverel = 1; } } else if (instr->opcode == aco_opcode::s_setreg_b32 || instr->opcode == aco_opcode::s_setreg_imm32_b32) { - SOPK_instruction *sopk = static_cast(instr.get()); + SOPK_instruction *sopk = instr->sopk(); unsigned offset = (sopk->imm >> 6) & 0x1f; unsigned size = ((sopk->imm >> 11) & 0x1f) + 1; unsigned reg = sopk->imm & 0x3f; @@ -604,14 +604,13 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 } else if (instr->isSALU() || instr->format == Format::SMEM) { if (instr->opcode == aco_opcode::s_waitcnt) { /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */ - uint16_t imm = static_cast(instr.get())->imm; + uint16_t imm = instr->sopp()->imm; unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10); if (vmcnt == 0) ctx.sgprs_read_by_VMEM.reset(); } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) { /* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */ - const SOPP_instruction *sopp = static_cast(instr.get()); - if (sopp->imm == 0xffe3) + if (instr->sopp()->imm == 0xffe3) ctx.sgprs_read_by_VMEM.reset(); } @@ -669,8 +668,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 } } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) { /* s_waitcnt_depctr can mitigate the problem if it has a magic imm */ - const SOPP_instruction *sopp = static_cast(instr.get()); - if ((sopp->imm & 0xfffe) == 0xfffe) + if ((instr->sopp()->imm & 0xfffe) == 0xfffe) ctx.has_nonVALU_exec_read = false; } @@ -697,7 +695,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 ctx.sgprs_read_by_SMEM.reset(); } else { /* Reducing lgkmcnt count to 0 always mitigates the hazard. */ - const SOPP_instruction *sopp = static_cast(instr.get()); + const SOPP_instruction *sopp = instr->sopp(); if (sopp->opcode == aco_opcode::s_waitcnt_lgkmcnt) { if (sopp->imm == 0 && sopp->definitions[0].physReg() == sgpr_null) ctx.sgprs_read_by_SMEM.reset(); @@ -727,7 +725,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 ctx.has_branch_after_DS = ctx.has_DS; } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) { /* Only s_waitcnt_vscnt can mitigate the hazard */ - const SOPK_instruction *sopk = static_cast(instr.get()); + const SOPK_instruction *sopk = instr->sopk(); if (sopk->definitions[0].physReg() == sgpr_null && sopk->imm == 0) ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false; } diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index cc6b189..33d7a5a 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -98,17 +98,13 @@ struct exec_ctx { bool needs_exact(aco_ptr& instr) { if (instr->format == Format::MUBUF) { - MUBUF_instruction *mubuf = static_cast(instr.get()); - return mubuf->disable_wqm; + return instr->mubuf()->disable_wqm; } else if (instr->format == Format::MTBUF) { - MTBUF_instruction *mtbuf = static_cast(instr.get()); - return mtbuf->disable_wqm; + return instr->mtbuf()->disable_wqm; } else if (instr->format == Format::MIMG) { - MIMG_instruction *mimg = static_cast(instr.get()); - return mimg->disable_wqm; + return instr->mimg()->disable_wqm; } else if (instr->format == Format::FLAT || instr->format == Format::GLOBAL) { - FLAT_instruction *flat = static_cast(instr.get()); - return flat->disable_wqm; + return instr->flatlike()->disable_wqm; } else { return instr->format == Format::EXP; } @@ -912,7 +908,7 @@ void add_branch_code(exec_ctx& ctx, Block* block) } if (block->kind & block_kind_uniform) { - Pseudo_branch_instruction* branch = static_cast(block->instructions.back().get()); + Pseudo_branch_instruction* branch = block->instructions.back()->branch(); if (branch->opcode == aco_opcode::p_branch) { branch->target[0] = block->linear_succs[0]; } else { diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 765e1f5..2f23fe2 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -423,8 +423,7 @@ wait_imm check_instr(Instruction* instr, wait_ctx& ctx) /* LDS reads and writes return in the order they were issued. same for GDS */ if (instr->format == Format::DS) { - bool gds = static_cast(instr)->gds; - if ((it->second.events & lgkm_events) == (gds ? event_gds : event_lds)) + if ((it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds)) continue; } @@ -440,10 +439,10 @@ wait_imm parse_wait_instr(wait_ctx& ctx, Instruction *instr) if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->definitions[0].physReg() == sgpr_null) { wait_imm imm; - imm.vs = std::min(imm.vs, static_cast(instr)->imm); + imm.vs = std::min(imm.vs, instr->sopk()->imm); return imm; } else if (instr->opcode == aco_opcode::s_waitcnt) { - return wait_imm(ctx.chip_class, static_cast(instr)->imm); + return wait_imm(ctx.chip_class, instr->sopp()->imm); } return wait_imm(); } @@ -523,20 +522,16 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info) * * TODO: Refine this when we have proper alias analysis. */ - SMEM_instruction *smem = static_cast(instr); if (ctx.pending_s_buffer_store && - !smem->definitions.empty() && - !smem->sync.can_reorder()) { + !instr->smem()->definitions.empty() && + !instr->smem()->sync.can_reorder()) { imm.lgkm = 0; } } - if (ctx.program->early_rast && - instr->opcode == aco_opcode::exp) { - - Export_instruction *exp = static_cast(instr); - if (exp->dest >= V_008DFC_SQ_EXP_POS && - exp->dest < V_008DFC_SQ_EXP_PRIM) { + if (ctx.program->early_rast && instr->opcode == aco_opcode::exp) { + if (instr->exp()->dest >= V_008DFC_SQ_EXP_POS && + instr->exp()->dest < V_008DFC_SQ_EXP_PRIM) { /* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export. * Wait for all stores (and atomics) to complete, so PS can read them. @@ -550,7 +545,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info) } if (instr->opcode == aco_opcode::p_barrier) - imm.combine(perform_barrier(ctx, static_cast(instr)->sync, semantic_acqrel)); + imm.combine(perform_barrier(ctx, instr->barrier()->sync, semantic_acqrel)); else imm.combine(perform_barrier(ctx, sync_info, semantic_release)); @@ -767,7 +762,7 @@ void gen(Instruction* instr, wait_ctx& ctx) { switch (instr->format) { case Format::EXP: { - Export_instruction* exp_instr = static_cast(instr); + Export_instruction* exp_instr = instr->exp(); wait_event ev; if (exp_instr->dest <= 9) @@ -792,7 +787,7 @@ void gen(Instruction* instr, wait_ctx& ctx) break; } case Format::FLAT: { - FLAT_instruction *flat = static_cast(instr); + FLAT_instruction *flat = instr->flat(); if (ctx.chip_class < GFX10 && !instr->definitions.empty()) update_counters_for_flat_load(ctx, flat->sync); else @@ -803,7 +798,7 @@ void gen(Instruction* instr, wait_ctx& ctx) break; } case Format::SMEM: { - SMEM_instruction *smem = static_cast(instr); + SMEM_instruction *smem = instr->smem(); update_counters(ctx, event_smem, smem->sync); if (!instr->definitions.empty()) @@ -815,7 +810,7 @@ void gen(Instruction* instr, wait_ctx& ctx) break; } case Format::DS: { - DS_instruction *ds = static_cast(instr); + DS_instruction *ds = instr->ds(); update_counters(ctx, ds->gds ? event_gds : event_lds, ds->sync); if (ds->gds) update_counters(ctx, event_gds_gpr_lock); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 3aa890f..4c27e58 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val) Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1); Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v); - static_cast(add)->neg[1] = true; + add->vop3()->neg[1] = true; return add->definitions[0].getTemp(); } @@ -1692,7 +1692,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) std::swap(src0, src1); add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr; } - static_cast(add_instr)->clamp = 1; + add_instr->vop3()->clamp = 1; } else if (dst.regClass() == v1) { if (ctx->options->chip_class >= GFX9) { aco_ptr add{create_instruction(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)}; @@ -1944,7 +1944,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_fsub: { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst); - VOP3P_instruction* sub = static_cast(add); + VOP3P_instruction* sub = add->vop3p(); sub->neg_lo[1] = true; sub->neg_hi[1] = true; break; @@ -1965,8 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } else if (dst.regClass() == v2) { Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0), as_vgpr(ctx, src1)); - VOP3_instruction* sub = static_cast(add); - sub->neg[1] = true; + add->vop3()->neg[1] = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -2102,7 +2101,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp src = get_alu_src_vop3p(ctx, instr->src[0]); Instruction* vop3p = bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0x3C00)), instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); - static_cast(vop3p)->clamp = true; + vop3p->vop3p()->clamp = true; emit_split_vector(ctx, dst, 2); break; } @@ -2115,8 +2114,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) // TODO: confirm that this holds under any circumstances } else if (dst.regClass() == v2) { Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u)); - VOP3_instruction* vop3 = static_cast(add); - vop3->clamp = true; + add->vop3()->clamp = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -2255,12 +2253,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi)); Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); - static_cast(sub)->neg[1] = true; + sub->vop3()->neg[1] = true; tmp = sub->definitions[0].getTemp(); Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v); - static_cast(vop3)->abs[0] = true; + vop3->vop3()->abs[0] = true; Temp cond = vop3->definitions[0].getTemp(); Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1); @@ -2926,7 +2924,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest); - static_cast(vop3)->abs[0] = true; + vop3->vop3()->abs[0] = true; cmp_res = vop3->definitions[0].getTemp(); } @@ -3517,7 +3515,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info, instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1); else instr = bld.ds(op, Definition(val), offset, m, const_offset); - static_cast(instr)->sync = info.sync; + instr->ds()->sync = info.sync; if (size < 4) val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u)); @@ -3933,8 +3931,7 @@ void store_lds(isel_context *ctx, unsigned elem_size_bytes, Temp data, uint32_t } else { instr = bld.ds(op, address_offset, split_data, m, inline_offset); } - static_cast(instr)->sync = - memory_sync_info(storage_shared); + instr->ds()->sync = memory_sync_info(storage_shared); } } @@ -4097,7 +4094,7 @@ void emit_single_mubuf_store(isel_context *ctx, Temp descriptor, Temp voffset, T /* idxen*/ false, /* addr64 */ false, /* disable_wqm */ false, /* glc */ true, /* dlc*/ false, /* slc */ slc); - static_cast(r.instr)->sync = sync; + r.instr->mubuf()->sync = sync; } void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset, @@ -5506,7 +5503,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr) unreachable("unimplemented or forbidden load_push_constant."); } - static_cast(bld.smem(op, Definition(vec), ptr, index).instr)->prevent_overflow = true; + bld.smem(op, Definition(vec), ptr, index).instr->smem()->prevent_overflow = true; if (!aligned) { Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index); @@ -7150,7 +7147,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { for (unsigned i = 0; i < write_count; i++) { aco_opcode op = get_buffer_store_op(write_datas[i].bytes()); Instruction *mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true, true); - static_cast(mubuf)->sync = memory_sync_info(storage_scratch, semantic_private); + mubuf->mubuf()->sync = memory_sync_info(storage_scratch, semantic_private); } } diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 5b46e8a..816edff 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -140,19 +140,19 @@ memory_sync_info get_sync_info(const Instruction* instr) { switch (instr->format) { case Format::SMEM: - return static_cast(instr)->sync; + return instr->smem()->sync; case Format::MUBUF: - return static_cast(instr)->sync; + return instr->mubuf()->sync; case Format::MIMG: - return static_cast(instr)->sync; + return instr->mimg()->sync; case Format::MTBUF: - return static_cast(instr)->sync; + return instr->mtbuf()->sync; case Format::FLAT: case Format::GLOBAL: case Format::SCRATCH: - return static_cast(instr)->sync; + return instr->flatlike()->sync; case Format::DS: - return static_cast(instr)->sync; + return instr->ds()->sync; default: return memory_sync_info(); } @@ -170,7 +170,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr& instr) return true; if (instr->isVOP3()) { - VOP3_instruction *vop3 = static_cast(instr.get()); + VOP3_instruction *vop3 = instr->vop3(); if (instr->format == Format::VOP3) return false; if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8) @@ -232,10 +232,10 @@ aco_ptr convert_to_SDWA(chip_class chip, aco_ptr& inst std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin()); - SDWA_instruction *sdwa = static_cast(instr.get()); + SDWA_instruction *sdwa = instr->sdwa(); if (tmp->isVOP3()) { - VOP3_instruction *vop3 = static_cast(tmp.get()); + VOP3_instruction *vop3 = tmp->vop3(); memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg)); memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs)); sdwa->omod = vop3->omod; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index bdffbc7..caa0d84 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1539,7 +1539,7 @@ constexpr bool Instruction::usesModifiers() const noexcept return true; if (format == Format::VOP3P) { - const VOP3P_instruction *vop3p = static_cast(this); + const VOP3P_instruction *vop3p = this->vop3p(); for (unsigned i = 0; i < operands.size(); i++) { if (vop3p->neg_lo[i] || vop3p->neg_hi[i]) return true; @@ -1550,7 +1550,7 @@ constexpr bool Instruction::usesModifiers() const noexcept } return vop3p->opsel_lo || vop3p->clamp; } else if (isVOP3()) { - const VOP3_instruction *vop3 = static_cast(this); + const VOP3_instruction *vop3 = this->vop3(); for (unsigned i = 0; i < operands.size(); i++) { if (vop3->abs[i] || vop3->neg[i]) return true; diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index f9eb932..1ebca2b 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - static_cast(perm)->opsel = 1; /* FI (Fetch Inactive) */ + perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */ } bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX)); @@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - static_cast(perm)->opsel = 1; /* FI (Fetch Inactive) */ + perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */ } emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); @@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op) if (dst.physReg().byte() == 2) { Operand def_lo(dst.physReg().advance(-2), v2b); Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op); - static_cast(instr)->opsel = 0; + instr->vop3()->opsel = 0; } else { assert(dst.physReg().byte() == 0); Operand def_hi(dst.physReg().advance(2), v2b); Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi); - static_cast(instr)->opsel = 2; + instr->vop3()->opsel = 2; } } else { uint32_t offset = dst.physReg().byte() * 8u; @@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo, if (can_use_pack) { Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi); /* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */ - static_cast(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1); + instr->vop3()->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1); return; } @@ -1810,7 +1810,7 @@ void lower_to_hw_instr(Program* program) aco_ptr& instr = block->instructions[instr_idx]; aco_ptr mov; if (instr->format == Format::PSEUDO && instr->opcode != aco_opcode::p_unit_test) { - Pseudo_instruction *pi = (Pseudo_instruction*)instr.get(); + Pseudo_instruction *pi = instr->pseudo(); switch (instr->opcode) { @@ -1897,7 +1897,7 @@ void lower_to_hw_instr(Program* program) instr2->opcode == aco_opcode::p_logical_end) continue; else if (instr2->opcode == aco_opcode::exp && - static_cast(instr2.get())->dest == null_exp_dest) + instr2->exp()->dest == null_exp_dest) continue; else if (instr2->opcode == aco_opcode::p_parallelcopy && instr2->definitions[0].isFixed() && @@ -1983,7 +1983,7 @@ void lower_to_hw_instr(Program* program) break; } } else if (instr->format == Format::PSEUDO_BRANCH) { - Pseudo_branch_instruction* branch = static_cast(instr.get()); + Pseudo_branch_instruction* branch = instr->branch(); uint32_t target = branch->target[0]; /* check if all blocks from current to target are empty */ @@ -2055,7 +2055,7 @@ void lower_to_hw_instr(Program* program) } } else if (instr->format == Format::PSEUDO_REDUCTION) { - Pseudo_reduction_instruction* reduce = static_cast(instr.get()); + Pseudo_reduction_instruction* reduce = instr->reduction(); emit_reduction(&ctx, reduce->opcode, reduce->reduce_op, reduce->cluster_size, reduce->operands[1].physReg(), // tmp reduce->definitions[1].physReg(), // stmp @@ -2063,7 +2063,7 @@ void lower_to_hw_instr(Program* program) reduce->definitions[2].physReg(), // sitmp reduce->operands[0], reduce->definitions[0]); } else if (instr->format == Format::PSEUDO_BARRIER) { - Pseudo_barrier_instruction* barrier = static_cast(instr.get()); + Pseudo_barrier_instruction* barrier = instr->barrier(); /* Anything larger than a workgroup isn't possible. Anything * smaller requires no instructions and this pseudo instruction diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index a5a3a8c..80b0960 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -178,8 +178,8 @@ struct InstrPred { return false; if (a->isVOP3()) { - VOP3_instruction* a3 = static_cast(a); - VOP3_instruction* b3 = static_cast(b); + VOP3_instruction* a3 = a->vop3(); + VOP3_instruction* b3 = b->vop3(); for (unsigned i = 0; i < 3; i++) { if (a3->abs[i] != b3->abs[i] || a3->neg[i] != b3->neg[i]) @@ -190,8 +190,8 @@ struct InstrPred { a3->opsel == b3->opsel; } if (a->isDPP()) { - DPP_instruction* aDPP = static_cast(a); - DPP_instruction* bDPP = static_cast(b); + DPP_instruction* aDPP = a->dpp(); + DPP_instruction* bDPP = b->dpp(); return aDPP->pass_flags == bDPP->pass_flags && aDPP->dpp_ctrl == bDPP->dpp_ctrl && aDPP->bank_mask == bDPP->bank_mask && @@ -203,8 +203,8 @@ struct InstrPred { aDPP->neg[1] == bDPP->neg[1]; } if (a->isSDWA()) { - SDWA_instruction* aSDWA = static_cast(a); - SDWA_instruction* bSDWA = static_cast(b); + SDWA_instruction* aSDWA = a->sdwa(); + SDWA_instruction* bSDWA = b->sdwa(); return aSDWA->sel[0] == bSDWA->sel[0] && aSDWA->sel[1] == bSDWA->sel[1] && aSDWA->dst_sel == bSDWA->dst_sel && @@ -221,13 +221,13 @@ struct InstrPred { case Format::SOPK: { if (a->opcode == aco_opcode::s_getreg_b32) return false; - SOPK_instruction* aK = static_cast(a); - SOPK_instruction* bK = static_cast(b); + SOPK_instruction* aK = a->sopk(); + SOPK_instruction* bK = b->sopk(); return aK->imm == bK->imm; } case Format::SMEM: { - SMEM_instruction* aS = static_cast(a); - SMEM_instruction* bS = static_cast(b); + SMEM_instruction* aS = a->smem(); + SMEM_instruction* bS = b->smem(); /* isel shouldn't be creating situations where this assertion fails */ assert(aS->prevent_overflow == bS->prevent_overflow); return aS->sync.can_reorder() && bS->sync.can_reorder() && @@ -236,8 +236,8 @@ struct InstrPred { aS->prevent_overflow == bS->prevent_overflow; } case Format::VINTRP: { - Interp_instruction* aI = static_cast(a); - Interp_instruction* bI = static_cast(b); + Interp_instruction* aI = a->vintrp(); + Interp_instruction* bI = b->vintrp(); if (aI->attribute != bI->attribute) return false; if (aI->component != bI->component) @@ -245,8 +245,8 @@ struct InstrPred { return true; } case Format::VOP3P: { - VOP3P_instruction* a3P = static_cast(a); - VOP3P_instruction* b3P = static_cast(b); + VOP3P_instruction* a3P = a->vop3p(); + VOP3P_instruction* b3P = b->vop3p(); for (unsigned i = 0; i < 3; i++) { if (a3P->neg_lo[i] != b3P->neg_lo[i] || a3P->neg_hi[i] != b3P->neg_hi[i]) @@ -257,15 +257,15 @@ struct InstrPred { a3P->clamp == b3P->clamp; } case Format::PSEUDO_REDUCTION: { - Pseudo_reduction_instruction *aR = static_cast(a); - Pseudo_reduction_instruction *bR = static_cast(b); + Pseudo_reduction_instruction *aR = a->reduction(); + Pseudo_reduction_instruction *bR = b->reduction(); return aR->pass_flags == bR->pass_flags && aR->reduce_op == bR->reduce_op && aR->cluster_size == bR->cluster_size; } case Format::MTBUF: { - MTBUF_instruction* aM = static_cast(a); - MTBUF_instruction* bM = static_cast(b); + MTBUF_instruction* aM = a->mtbuf(); + MTBUF_instruction* bM = b->mtbuf(); return aM->sync.can_reorder() && bM->sync.can_reorder() && aM->sync == bM->sync && aM->dfmt == bM->dfmt && @@ -280,8 +280,8 @@ struct InstrPred { aM->disable_wqm == bM->disable_wqm; } case Format::MUBUF: { - MUBUF_instruction* aM = static_cast(a); - MUBUF_instruction* bM = static_cast(b); + MUBUF_instruction* aM = a->mubuf(); + MUBUF_instruction* bM = b->mubuf(); return aM->sync.can_reorder() && bM->sync.can_reorder() && aM->sync == bM->sync && aM->offset == bM->offset && @@ -308,8 +308,8 @@ struct InstrPred { a->opcode != aco_opcode::ds_permute_b32 && a->opcode != aco_opcode::ds_swizzle_b32) return false; - DS_instruction* aD = static_cast(a); - DS_instruction* bD = static_cast(b); + DS_instruction* aD = a->ds(); + DS_instruction* bD = b->ds(); return aD->sync.can_reorder() && bD->sync.can_reorder() && aD->sync == bD->sync && aD->pass_flags == bD->pass_flags && @@ -318,8 +318,8 @@ struct InstrPred { aD->offset1 == bD->offset1; } case Format::MIMG: { - MIMG_instruction* aM = static_cast(a); - MIMG_instruction* bM = static_cast(b); + MIMG_instruction* aM = a->mimg(); + MIMG_instruction* bM = b->mimg(); return aM->sync.can_reorder() && bM->sync.can_reorder() && aM->sync == bM->sync && aM->dmask == bM->dmask && diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 7806c37..de45a98 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -940,7 +940,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode]; if (instr->isSDWA()) - can_use_mod = can_use_mod && (static_cast(instr.get())->sel[i] & sdwa_asuint) == sdwa_udword; + can_use_mod = can_use_mod && (instr->sdwa()->sel[i] & sdwa_asuint) == sdwa_udword; else can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr)); @@ -949,11 +949,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) to_VOP3(ctx, instr); instr->operands[i] = Operand(info.temp); if (instr->isDPP()) - static_cast(instr.get())->abs[i] = true; + instr->dpp()->abs[i] = true; else if (instr->isSDWA()) - static_cast(instr.get())->abs[i] = true; + instr->sdwa()->abs[i] = true; else - static_cast(instr.get())->abs[i] = true; + instr->vop3()->abs[i] = true; } if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) { instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; @@ -968,11 +968,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) to_VOP3(ctx, instr); instr->operands[i].setTemp(info.temp); if (instr->isDPP()) - static_cast(instr.get())->neg[i] = true; + instr->dpp()->neg[i] = true; else if (instr->isSDWA()) - static_cast(instr.get())->neg[i] = true; + instr->sdwa()->neg[i] = true; else - static_cast(instr.get())->neg[i] = true; + instr->vop3()->neg[i] = true; continue; } unsigned bits = get_operand_size(instr, i); @@ -999,7 +999,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) /* MUBUF: propagate constants and combine additions */ else if (instr->format == Format::MUBUF) { - MUBUF_instruction *mubuf = static_cast(instr.get()); + MUBUF_instruction *mubuf = instr->mubuf(); Temp base; uint32_t offset; while (info.is_temp()) @@ -1041,7 +1041,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) /* DS: combine additions */ else if (instr->format == Format::DS) { - DS_instruction *ds = static_cast(instr.get()); + DS_instruction *ds = instr->ds(); Temp base; uint32_t offset; bool has_usable_ds_offset = ctx.program->chip_class >= GFX7; @@ -1073,7 +1073,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) /* SMEM: propagate constants and combine additions */ else if (instr->format == Format::SMEM) { - SMEM_instruction *smem = static_cast(instr.get()); + SMEM_instruction *smem = instr->smem(); Temp base; uint32_t offset; bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow; @@ -1108,7 +1108,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) new_instr->nv = smem->nv; new_instr->disable_wqm = smem->disable_wqm; instr.reset(new_instr); - smem = static_cast(instr.get()); + smem = instr->smem(); } continue; } @@ -1365,7 +1365,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) } case aco_opcode::v_med3_f16: case aco_opcode::v_med3_f32: { /* clamp */ - VOP3_instruction* vop3 = static_cast(instr.get()); + VOP3_instruction* vop3 = instr->vop3(); if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] || vop3->neg[0] || vop3->neg[1] || vop3->neg[2] || vop3->omod != 0 || vop3->opsel != 0) @@ -1682,7 +1682,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) return false; if (op_instr[i]->isVOP3()) { - VOP3_instruction *vop3 = static_cast(op_instr[i]); + VOP3_instruction *vop3 = op_instr[i]->vop3(); if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) return false; neg[i] = vop3->neg[0]; @@ -1798,7 +1798,7 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr& instr) Instruction *new_instr; if (cmp->isVOP3()) { VOP3_instruction *new_vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); - VOP3_instruction *cmp_vop3 = static_cast(cmp); + VOP3_instruction *cmp_vop3 = cmp->vop3(); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; @@ -1885,7 +1885,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& in return false; if (nan_test->isVOP3()) { - VOP3_instruction *vop3 = static_cast(nan_test); + VOP3_instruction *vop3 = nan_test->vop3(); if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) return false; } @@ -1917,7 +1917,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& in Instruction *new_instr; if (cmp->isVOP3()) { VOP3_instruction *new_vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); - VOP3_instruction *cmp_vop3 = static_cast(cmp); + VOP3_instruction *cmp_vop3 = cmp->vop3(); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; @@ -1966,7 +1966,7 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr& instr) Instruction *new_instr; if (cmp->isVOP3()) { VOP3_instruction *new_vop3 = create_instruction(new_opcode, asVOP3(Format::VOPC), 2, 1); - VOP3_instruction *cmp_vop3 = static_cast(cmp); + VOP3_instruction *cmp_vop3 = cmp->vop3(); memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); new_vop3->clamp = cmp_vop3->clamp; @@ -1976,7 +1976,7 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr& instr) } else if (cmp->isSDWA()) { SDWA_instruction *new_sdwa = create_instruction( new_opcode, (Format)((uint16_t)Format::SDWA | (uint16_t)Format::VOPC), 2, 1); - SDWA_instruction *cmp_sdwa = static_cast(cmp); + SDWA_instruction *cmp_sdwa = cmp->sdwa(); memcpy(new_sdwa->abs, cmp_sdwa->abs, sizeof(new_sdwa->abs)); memcpy(new_sdwa->sel, cmp_sdwa->sel, sizeof(new_sdwa->sel)); memcpy(new_sdwa->neg, cmp_sdwa->neg, sizeof(new_sdwa->neg)); @@ -2019,8 +2019,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1])) return false; - VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast(op1_instr) : NULL; - VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast(op2_instr) : NULL; + VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? op1_instr->vop3() : NULL; + VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? op2_instr->vop3() : NULL; if (op1_instr->isSDWA() || op2_instr->isSDWA()) return false; @@ -2641,11 +2641,11 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr& instr) assert(!ctx.info[instr->definitions[0].tempId()].is_mad()); if (instr->isSDWA()) { - if (!apply_omod_clamp_helper(ctx, static_cast(instr.get()), def_info)) + if (!apply_omod_clamp_helper(ctx, instr->sdwa(), def_info)) return false; } else { to_VOP3(ctx, instr); - if (!apply_omod_clamp_helper(ctx, static_cast(instr.get()), def_info)) + if (!apply_omod_clamp_helper(ctx, instr->vop3(), def_info)) return false; } @@ -2767,7 +2767,7 @@ void propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opse void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) { - VOP3P_instruction* vop3p = static_cast(instr.get()); + VOP3P_instruction* vop3p = instr->vop3p(); /* apply clamp */ if (instr->opcode == aco_opcode::v_pk_mul_f16 && @@ -2778,7 +2778,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) ssa_info& info = ctx.info[instr->operands[0].tempId()]; if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) { - VOP3P_instruction* candidate = static_cast(ctx.info[instr->operands[0].tempId()].instr); + VOP3P_instruction* candidate = ctx.info[instr->operands[0].tempId()].instr->vop3p(); candidate->clamp = true; propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi); std::swap(instr->definitions[0], candidate->definitions[0]); @@ -2804,7 +2804,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) if (!check_vop3_operands(ctx, 2, ops)) continue; - VOP3P_instruction* fneg = static_cast(info.instr); + VOP3P_instruction* fneg = info.instr->vop3p(); if (fneg->clamp) continue; instr->operands[i] = fneg->operands[0]; @@ -2849,7 +2849,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) continue; /* no clamp allowed between mul and add */ - if (static_cast(info.instr)->clamp) + if (info.instr->vop3p()->clamp) continue; mul_instr = info.instr; @@ -2875,7 +2875,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) /* turn packed mul+add into v_pk_fma_f16 */ assert(mul_instr->format == Format::VOP3P); aco_ptr fma{create_instruction(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)}; - VOP3P_instruction* mul = static_cast(mul_instr); + VOP3P_instruction* mul = mul_instr->vop3p(); for (unsigned i = 0; i < 2; i++) { fma->operands[i] = op[i]; fma->neg_lo[i] = mul->neg_lo[i]; @@ -2944,7 +2944,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr if (mul_instr->operands[0].isLiteral()) return; - if (mul_instr->isVOP3() && static_cast(mul_instr)->clamp) + if (mul_instr->isVOP3() && mul_instr->vop3()->clamp) return; if (mul_instr->isSDWA()) return; @@ -2958,9 +2958,9 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr instr->operands[0] = mul_instr->operands[0]; instr->operands[1] = mul_instr->operands[1]; instr->definitions[0] = def; - VOP3_instruction* new_mul = static_cast(instr.get()); + VOP3_instruction* new_mul = instr->vop3(); if (mul_instr->isVOP3()) { - VOP3_instruction* mul = static_cast(mul_instr); + VOP3_instruction* mul = mul_instr->vop3(); new_mul->neg[0] = mul->neg[0] && !is_abs; new_mul->neg[1] = mul->neg[1] && !is_abs; new_mul->abs[0] = mul->abs[0] || is_abs; @@ -3002,9 +3002,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr continue; /* no clamp/omod allowed between mul and add */ - if (info.instr->isVOP3() && - (static_cast(info.instr)->clamp || - static_cast(info.instr)->omod)) + if (info.instr->isVOP3() && (info.instr->vop3()->clamp || info.instr->vop3()->omod)) continue; Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]}; @@ -3035,7 +3033,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr bool clamp = false; if (mul_instr->isVOP3()) { - VOP3_instruction* vop3 = static_cast (mul_instr); + VOP3_instruction* vop3 = mul_instr->vop3(); neg[0] = vop3->neg[0]; neg[1] = vop3->neg[1]; abs[0] = vop3->abs[0]; @@ -3043,7 +3041,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr } if (instr->isVOP3()) { - VOP3_instruction* vop3 = static_cast (instr.get()); + VOP3_instruction* vop3 = instr->vop3(); neg[2] = vop3->neg[add_op_idx]; abs[2] = vop3->abs[add_op_idx]; omod = vop3->omod; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 6324355..a4e3b8f 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -273,13 +273,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) { switch (instr->format) { case Format::SOPK: { - const SOPK_instruction* sopk = static_cast(instr); + const SOPK_instruction* sopk = instr->sopk(); fprintf(output, " imm:%d", sopk->imm & 0x8000 ? (sopk->imm - 65536) : sopk->imm); break; } case Format::SOPP: { - const SOPP_instruction* sopp = static_cast(instr); - uint16_t imm = sopp->imm; + uint16_t imm = instr->sopp()->imm; switch (instr->opcode) { case aco_opcode::s_waitcnt: { /* we usually should check the chip class for vmcnt/lgkm, but @@ -341,12 +340,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } } - if (sopp->block != -1) - fprintf(output, " block:BB%d", sopp->block); + if (instr->sopp()->block != -1) + fprintf(output, " block:BB%d", instr->sopp()->block); break; } case Format::SMEM: { - const SMEM_instruction* smem = static_cast(instr); + const SMEM_instruction* smem = instr->smem(); if (smem->glc) fprintf(output, " glc"); if (smem->dlc) @@ -357,12 +356,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::VINTRP: { - const Interp_instruction* vintrp = static_cast(instr); + const Interp_instruction* vintrp = instr->vintrp(); fprintf(output, " attr%d.%c", vintrp->attribute, "xyzw"[vintrp->component]); break; } case Format::DS: { - const DS_instruction* ds = static_cast(instr); + const DS_instruction* ds = instr->ds(); if (ds->offset0) fprintf(output, " offset0:%u", ds->offset0); if (ds->offset1) @@ -373,7 +372,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::MUBUF: { - const MUBUF_instruction* mubuf = static_cast(instr); + const MUBUF_instruction* mubuf = instr->mubuf(); if (mubuf->offset) fprintf(output, " offset:%u", mubuf->offset); if (mubuf->offen) @@ -398,7 +397,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::MIMG: { - const MIMG_instruction* mimg = static_cast(instr); + const MIMG_instruction* mimg = instr->mimg(); unsigned identity_dmask = !instr->definitions.empty() ? (1 << instr->definitions[0].size()) - 1 : 0xf; @@ -458,7 +457,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::EXP: { - const Export_instruction* exp = static_cast(instr); + const Export_instruction* exp = instr->exp(); unsigned identity_mask = exp->compressed ? 0x5 : 0xf; if ((exp->enabled_mask & identity_mask) != identity_mask) fprintf(output, " en:%c%c%c%c", @@ -486,7 +485,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::PSEUDO_BRANCH: { - const Pseudo_branch_instruction* branch = static_cast(instr); + const Pseudo_branch_instruction* branch = instr->branch(); /* Note: BB0 cannot be a branch target */ if (branch->target[0] != 0) fprintf(output, " BB%d", branch->target[0]); @@ -495,14 +494,14 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::PSEUDO_REDUCTION: { - const Pseudo_reduction_instruction* reduce = static_cast(instr); + const Pseudo_reduction_instruction* reduce = instr->reduction(); fprintf(output, " op:%s", reduce_ops[reduce->reduce_op]); if (reduce->cluster_size) fprintf(output, " cluster_size:%u", reduce->cluster_size); break; } case Format::PSEUDO_BARRIER: { - const Pseudo_barrier_instruction* barrier = static_cast(instr); + const Pseudo_barrier_instruction* barrier = instr->barrier(); print_sync(barrier->sync, output); print_scope(barrier->exec_scope, output, "exec_scope"); break; @@ -510,7 +509,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) case Format::FLAT: case Format::GLOBAL: case Format::SCRATCH: { - const FLAT_instruction* flat = static_cast(instr); + const FLAT_instruction* flat = instr->flatlike(); if (flat->offset) fprintf(output, " offset:%u", flat->offset); if (flat->glc) @@ -529,7 +528,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::MTBUF: { - const MTBUF_instruction* mtbuf = static_cast(instr); + const MTBUF_instruction* mtbuf = instr->mtbuf(); fprintf(output, " dfmt:"); switch (mtbuf->dfmt) { case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break; @@ -579,7 +578,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } case Format::VOP3P: { - if (static_cast(instr)->clamp) + if (instr->vop3p()->clamp) fprintf(output, " clamp"); break; } @@ -588,7 +587,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) } } if (instr->isVOP3()) { - const VOP3_instruction* vop3 = static_cast(instr); + const VOP3_instruction* vop3 = instr->vop3(); switch (vop3->omod) { case 1: fprintf(output, " *2"); @@ -605,7 +604,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) if (vop3->opsel & (1 << 3)) fprintf(output, " opsel_hi"); } else if (instr->isDPP()) { - const DPP_instruction* dpp = static_cast(instr); + const DPP_instruction* dpp = instr->dpp(); if (dpp->dpp_ctrl <= 0xff) { fprintf(output, " quad_perm:[%d,%d,%d,%d]", dpp->dpp_ctrl & 0x3, (dpp->dpp_ctrl >> 2) & 0x3, @@ -642,7 +641,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) if (dpp->bound_ctrl) fprintf(output, " bound_ctrl:1"); } else if ((int)instr->format & (int)Format::SDWA) { - const SDWA_instruction* sdwa = static_cast(instr); + const SDWA_instruction* sdwa = instr->sdwa(); switch (sdwa->omod) { case 1: fprintf(output, " *2"); @@ -694,7 +693,7 @@ void aco_print_instr(const Instruction *instr, FILE *output) bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool)); uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t)); if ((int)instr->format & (int)Format::VOP3) { - const VOP3_instruction* vop3 = static_cast(instr); + const VOP3_instruction* vop3 = instr->vop3(); for (unsigned i = 0; i < instr->operands.size(); ++i) { abs[i] = vop3->abs[i]; neg[i] = vop3->neg[i]; @@ -702,7 +701,7 @@ void aco_print_instr(const Instruction *instr, FILE *output) sel[i] = sdwa_udword; } } else if (instr->isDPP()) { - const DPP_instruction* dpp = static_cast(instr); + const DPP_instruction* dpp = instr->dpp(); for (unsigned i = 0; i < instr->operands.size(); ++i) { abs[i] = i < 2 ? dpp->abs[i] : false; neg[i] = i < 2 ? dpp->neg[i] : false; @@ -710,7 +709,7 @@ void aco_print_instr(const Instruction *instr, FILE *output) sel[i] = sdwa_udword; } } else if (instr->isSDWA()) { - const SDWA_instruction* sdwa = static_cast(instr); + const SDWA_instruction* sdwa = instr->sdwa(); for (unsigned i = 0; i < instr->operands.size(); ++i) { abs[i] = i < 2 ? sdwa->abs[i] : false; neg[i] = i < 2 ? sdwa->neg[i] : false; @@ -757,7 +756,7 @@ void aco_print_instr(const Instruction *instr, FILE *output) fprintf(output, "|"); if (instr->format == Format::VOP3P) { - const VOP3P_instruction* vop3 = static_cast(instr); + const VOP3P_instruction* vop3 = instr->vop3p(); if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) { fprintf(output, ".%c%c", vop3->opsel_lo & (1 << i) ? 'y' : 'x', diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index a1e69a6..1c915f2 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -91,7 +91,7 @@ void setup_reduce_temp(Program* program) if (instr->format != Format::PSEUDO_REDUCTION) continue; - ReduceOp op = static_cast(instr)->reduce_op; + ReduceOp op = instr->reduction()->reduce_op; reduceTmp_in_loop |= block.loop_nest_depth > 0; if ((int)last_top_level_block_idx != inserted_at) { @@ -115,7 +115,7 @@ void setup_reduce_temp(Program* program) } /* same as before, except for the vector temporary instead of the reduce temporary */ - unsigned cluster_size = static_cast(instr)->cluster_size; + unsigned cluster_size = instr->reduction()->cluster_size; bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 || op == fmin64 || op == fmax64 || op == umin64 || op == umax64 || op == imin64 || op == imax64 || diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 4b083ee..c6e482f 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -503,11 +503,10 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx update_phi_map(ctx, tmp.get(), instr.get()); return; } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) { - VOP3_instruction* vop3 = static_cast(instr.get()); - vop3->opsel |= (byte / 2) << idx; + instr->vop3()->opsel |= (byte / 2) << idx; return; } else if (instr->format == Format::VOP3P && byte == 2) { - VOP3P_instruction* vop3p = static_cast(instr.get()); + VOP3P_instruction* vop3p = instr->vop3p(); assert(!(vop3p->opsel_lo & (1 << idx))); vop3p->opsel_lo |= 1 << idx; vop3p->opsel_hi |= 1 << idx; @@ -614,7 +613,7 @@ void add_subdword_definition(Program *program, aco_ptr& instr, unsi convert_to_SDWA(chip, instr); return; } else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) { - VOP3_instruction *vop3 = static_cast(instr.get()); + VOP3_instruction *vop3 = instr->vop3(); if (reg.byte() == 2) vop3->opsel |= (1 << 3); /* dst in high half */ return; @@ -1569,9 +1568,8 @@ void handle_pseudo(ra_ctx& ctx, if (!needs_scratch_reg) return; - Pseudo_instruction *pi = (Pseudo_instruction *)instr; if (reg_file[scc]) { - pi->tmp_in_scc = true; + instr->pseudo()->tmp_in_scc = true; int reg = ctx.max_used_sgpr; for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--) @@ -1587,9 +1585,9 @@ void handle_pseudo(ra_ctx& ctx, } adjust_max_used_regs(ctx, s1, reg); - pi->scratch_sgpr = PhysReg{(unsigned)reg}; + instr->pseudo()->scratch_sgpr = PhysReg{(unsigned)reg}; } else { - pi->tmp_in_scc = false; + instr->pseudo()->tmp_in_scc = false; } } @@ -2158,7 +2156,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc if (instr->format == Format::EXP || (instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) || - (instr->format == Format::DS && static_cast(instr.get())->gds)) { + (instr->format == Format::DS && instr->ds()->gds)) { for (unsigned j = 0; j < operand.size(); j++) ctx.war_hint.set(operand.physReg().reg() + j); } diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index fc9be0e..0a97984 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -320,7 +320,7 @@ void MoveState::upwards_skip() bool is_gs_or_done_sendmsg(const Instruction *instr) { if (instr->opcode == aco_opcode::s_sendmsg) { - uint16_t imm = static_cast(instr)->imm; + uint16_t imm = instr->sopp()->imm; return (imm & sendmsg_id_mask) == _sendmsg_gs || (imm & sendmsg_id_mask) == _sendmsg_gs_done; } @@ -329,10 +329,8 @@ bool is_gs_or_done_sendmsg(const Instruction *instr) bool is_done_sendmsg(const Instruction *instr) { - if (instr->opcode == aco_opcode::s_sendmsg) { - uint16_t imm = static_cast(instr)->imm; - return (imm & sendmsg_id_mask) == _sendmsg_gs_done; - } + if (instr->opcode == aco_opcode::s_sendmsg) + return (instr->sopp()->imm & sendmsg_id_mask) == _sendmsg_gs_done; return false; } @@ -382,7 +380,7 @@ void add_memory_event(memory_event_set *set, Instruction *instr, memory_sync_inf { set->has_control_barrier |= is_done_sendmsg(instr); if (instr->opcode == aco_opcode::p_barrier) { - Pseudo_barrier_instruction *bar = static_cast(instr); + Pseudo_barrier_instruction *bar = instr->barrier(); if (bar->sync.semantics & semantic_acquire) set->bar_acquire |= bar->sync.storage; if (bar->sync.semantics & semantic_release) @@ -859,7 +857,7 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v Instruction* current = block->instructions[idx].get(); if (block->kind & block_kind_export_end && current->format == Format::EXP) { - unsigned target = static_cast(current)->dest; + unsigned target = current->exp()->dest; if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) { ctx.mv.current = current; schedule_position_export(ctx, block, live_vars.register_demand[block->index], current, idx); diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 0a6e8be..be1ed48 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -283,7 +283,7 @@ aco_ptr do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); } else if (instr->format == Format::SOPK) { res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); - static_cast(res.get())->imm = static_cast(instr)->imm; + res->sopk()->imm = instr->sopk()->imm; } for (unsigned i = 0; i < instr->operands.size(); i++) { res->operands[i] = instr->operands[i]; @@ -1589,11 +1589,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { bld.insert(split); for (unsigned i = 0; i < temp.size(); i++) { Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true); - static_cast(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); } } else { Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true); - static_cast(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); } } else { ctx.program->config->spilled_sgprs += (*it)->operands[0].size(); @@ -1658,12 +1658,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { Temp tmp = bld.tmp(v1); vec->operands[i] = Operand(tmp); Instruction *instr = bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true); - static_cast(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); } bld.insert(vec); } else { Instruction *instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true); - static_cast(instr)->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); } } else { uint32_t spill_slot = slots[spill_id]; diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index 76a843a..1b5d2ca 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -178,7 +178,7 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block) pred->linear_succs[0] = succ_idx; ctx.program->blocks[succ_idx].linear_preds[i] = pred->index; - Pseudo_branch_instruction *branch = static_cast(pred->instructions.back().get()); + Pseudo_branch_instruction *branch = pred->instructions.back()->branch(); assert(branch->format == Format::PSEUDO_BRANCH); branch->target[0] = succ_idx; branch->target[1] = succ_idx; @@ -196,7 +196,7 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block) Block& pred = ctx.program->blocks[block->linear_preds[0]]; Block& succ = ctx.program->blocks[block->linear_succs[0]]; - Pseudo_branch_instruction* branch = static_cast(pred.instructions.back().get()); + Pseudo_branch_instruction* branch = pred.instructions.back()->branch(); if (branch->opcode == aco_opcode::p_branch) { branch->target[0] = succ.index; branch->target[1] = succ.index; diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp index fc0cf84..b09f9a7 100644 --- a/src/amd/compiler/aco_statistics.cpp +++ b/src/amd/compiler/aco_statistics.cpp @@ -46,7 +46,7 @@ void collect_preasm_stats(Program *program) program->statistics[statistic_instructions] += block.instructions.size(); for (aco_ptr& instr : block.instructions) { - if (instr->format == Format::SOPP && static_cast(instr.get())->block != -1) + if (instr->format == Format::SOPP && instr->sopp()->block != -1) program->statistics[statistic_branches]++; if (instr->opcode == aco_opcode::p_constaddr) diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 5b6aa53..c4e1cd4 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -148,7 +148,7 @@ bool validate_ir(Program* program) check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get()); - SDWA_instruction *sdwa = static_cast(instr.get()); + SDWA_instruction *sdwa = instr->sdwa(); check(sdwa->omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get()); if (base_format == Format::VOPC) { check(sdwa->clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get()); @@ -188,7 +188,7 @@ bool validate_ir(Program* program) /* check opsel */ if (instr->isVOP3()) { - VOP3_instruction *vop3 = static_cast(instr.get()); + VOP3_instruction *vop3 = instr->vop3(); check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get()); for (unsigned i = 0; i < 3; i++) { @@ -381,9 +381,7 @@ bool validate_ir(Program* program) for (const Operand &op : instr->operands) check(op.regClass().type() == RegType::vgpr, "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", instr.get()); - unsigned cluster_size = static_cast(instr.get())->cluster_size; - - if (instr->opcode == aco_opcode::p_reduce && cluster_size == program->wave_size) + if (instr->opcode == aco_opcode::p_reduce && instr->reduction()->cluster_size == program->wave_size) check(instr->definitions[0].regClass().type() == RegType::sgpr, "The result of unclustered reductions must go into an SGPR.", instr.get()); else check(instr->definitions[0].regClass().type() == RegType::vgpr, "The result of scans and clustered reductions must go into a VGPR.", instr.get()); @@ -555,7 +553,7 @@ bool validate_subdword_operand(chip_class chip, const aco_ptr& inst return byte == 0; if (instr->format == Format::PSEUDO && chip >= GFX8) return true; - if (instr->isSDWA() && (static_cast(instr.get())->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes())) + if (instr->isSDWA() && (instr->sdwa()->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes())) return true; if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1)) return true; @@ -605,7 +603,7 @@ bool validate_subdword_definition(chip_class chip, const aco_ptr& i if (instr->format == Format::PSEUDO && chip >= GFX8) return true; - if (instr->isSDWA() && static_cast(instr.get())->dst_sel == (sdwa_isra | def.bytes())) + if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes())) return true; if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1)) return true; @@ -636,7 +634,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr if (instr->format == Format::PSEUDO) return chip >= GFX8 ? def.bytes() : def.size() * 4u; - if (instr->isSDWA() && static_cast(instr.get())->dst_sel == (sdwa_isra | def.bytes())) + if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes())) return def.bytes(); switch (instr->opcode) { diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index f914bce..c0c2165 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3) //! v1: %res1 = v_add_u32 %a, %tmp1 //! p_unit_test 1, %res1 tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); - static_cast(tmp.instr)->clamp = true; + tmp.instr->vop3()->clamp = true; writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); //! v1: %tmp2 = v_add_u32 %b, %c @@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3) //! p_unit_test 2, %res2 tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); - static_cast(tmp.instr)->clamp = true; + tmp.instr->vop3()->clamp = true; writeout(2, tmp); finish_opt_test(); diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index bf3c498..2c79203 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -197,7 +197,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) Definition(v0_lo, v1), Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b)); - static_cast(pseudo)->scratch_sgpr = m0; + pseudo->pseudo()->scratch_sgpr = m0; //~gfx[67]! p_unit_test 14 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] -- 2.7.4