From e115b01948f90dcb922c626438a0e83a796d5ceb Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Thu, 21 Jan 2021 16:13:34 +0000 Subject: [PATCH] aco: return references in instruction cast methods MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Signed-off-by: Rhys Perry Reviewed-by: Daniel Schürmann Part-of: --- src/amd/compiler/aco_assembler.cpp | 260 ++++++++++---------- src/amd/compiler/aco_insert_NOPs.cpp | 34 +-- src/amd/compiler/aco_insert_exec_mask.cpp | 18 +- src/amd/compiler/aco_insert_waitcnt.cpp | 52 ++-- src/amd/compiler/aco_instruction_selection.cpp | 32 +-- src/amd/compiler/aco_ir.cpp | 46 ++-- src/amd/compiler/aco_ir.h | 126 +++++----- src/amd/compiler/aco_lower_to_hw_instr.cpp | 34 +-- src/amd/compiler/aco_opt_value_numbering.cpp | 212 ++++++++--------- src/amd/compiler/aco_optimizer.cpp | 233 +++++++++--------- src/amd/compiler/aco_print_ir.cpp | 318 ++++++++++++------------- src/amd/compiler/aco_reduce_assign.cpp | 4 +- src/amd/compiler/aco_register_allocation.cpp | 22 +- src/amd/compiler/aco_scheduler.cpp | 22 +- src/amd/compiler/aco_spill.cpp | 10 +- src/amd/compiler/aco_ssa_elimination.cpp | 52 ++-- src/amd/compiler/aco_statistics.cpp | 2 +- src/amd/compiler/aco_validate.cpp | 24 +- src/amd/compiler/tests/test_optimizer.cpp | 4 +- src/amd/compiler/tests/test_to_hw_instr.cpp | 2 +- 20 files changed, 753 insertions(+), 754 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index c447516..48a2b55 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -109,7 +109,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::SOPK: { - SOPK_instruction *sopk = instr->sopk(); + SOPK_instruction& sopk = instr->sopk(); if (instr->opcode == aco_opcode::s_subvector_loop_begin) { assert(ctx.chip_class >= GFX10); @@ -121,7 +121,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* /* Adjust s_subvector_loop_begin instruction to the address after the end */ out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos); /* Adjust s_subvector_loop_end instruction to the address after the beginning */ - sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size()); + sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size()); ctx.subvector_begin_pos = -1; } @@ -132,7 +132,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* instr->definitions[0].physReg() << 16 : !instr->operands.empty() && instr->operands[0].physReg() <= 127 ? instr->operands[0].physReg() << 16 : 0; - encoding |= sopk->imm; + encoding |= sopk.imm; out.push_back(encoding); break; } @@ -157,19 +157,19 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::SOPP: { - SOPP_instruction* sopp = instr->sopp(); + SOPP_instruction& sopp = instr->sopp(); uint32_t encoding = (0b101111111 << 23); encoding |= opcode << 16; - encoding |= (uint16_t) sopp->imm; - if (sopp->block != -1) { - sopp->pass_flags = 0; - ctx.branches.emplace_back(out.size(), sopp); + encoding |= (uint16_t) sopp.imm; + if (sopp.block != -1) { + sopp.pass_flags = 0; + ctx.branches.emplace_back(out.size(), &sopp); } out.push_back(encoding); break; } case Format::SMEM: { - SMEM_instruction* smem = instr->smem(); + SMEM_instruction& smem = instr->smem(); bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); bool is_load = !instr->definitions.empty(); uint32_t encoding = 0; @@ -196,16 +196,16 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* if (ctx.chip_class <= GFX9) { encoding = (0b110000 << 26); - assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */ - encoding |= smem->nv ? 1 << 15 : 0; + assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */ + encoding |= smem.nv ? 1 << 15 : 0; } else { encoding = (0b111101 << 26); - assert(!smem->nv); /* Non-volatile is not supported on GFX10 */ - encoding |= smem->dlc ? 1 << 14 : 0; + assert(!smem.nv); /* Non-volatile is not supported on GFX10 */ + encoding |= smem.dlc ? 1 << 14 : 0; } encoding |= opcode << 18; - encoding |= smem->glc ? 1 << 16 : 0; + encoding |= smem.glc ? 1 << 16 : 0; if (ctx.chip_class <= GFX9) { if (instr->operands.size() >= 2) @@ -284,7 +284,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::VINTRP: { - Interp_instruction* interp = instr->vintrp(); + Interp_instruction& interp = instr->vintrp(); uint32_t encoding = 0; if (instr->opcode == aco_opcode::v_interp_p1ll_f16 || @@ -304,8 +304,8 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* out.push_back(encoding); encoding = 0; - encoding |= interp->attribute; - encoding |= interp->component << 6; + encoding |= interp.attribute; + encoding |= interp.component << 6; encoding |= instr->operands[0].physReg() << 9; if (instr->opcode == aco_opcode::v_interp_p2_f16 || instr->opcode == aco_opcode::v_interp_p2_legacy_f16 || @@ -323,8 +323,8 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* assert(encoding); encoding |= (0xFF & instr->definitions[0].physReg()) << 18; encoding |= opcode << 16; - encoding |= interp->attribute << 10; - encoding |= interp->component << 8; + encoding |= interp.attribute << 10; + encoding |= interp.component << 8; if (instr->opcode == aco_opcode::v_interp_mov_f32) encoding |= (0x3 & instr->operands[0].constantValue()); else @@ -334,17 +334,17 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::DS: { - DS_instruction* ds = instr->ds(); + DS_instruction& ds = instr->ds(); uint32_t encoding = (0b110110 << 26); if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding |= opcode << 17; - encoding |= (ds->gds ? 1 : 0) << 16; + encoding |= (ds.gds ? 1 : 0) << 16; } else { encoding |= opcode << 18; - encoding |= (ds->gds ? 1 : 0) << 17; + encoding |= (ds.gds ? 1 : 0) << 17; } - encoding |= ((0xFF & ds->offset1) << 8); - encoding |= (0xFFFF & ds->offset0); + encoding |= ((0xFF & ds.offset1) << 8); + encoding |= (0xFFFF & ds.offset0); out.push_back(encoding); encoding = 0; unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0; @@ -358,30 +358,30 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::MUBUF: { - MUBUF_instruction* mubuf = instr->mubuf(); + MUBUF_instruction& mubuf = instr->mubuf(); uint32_t encoding = (0b111000 << 26); encoding |= opcode << 18; - encoding |= (mubuf->lds ? 1 : 0) << 16; - encoding |= (mubuf->glc ? 1 : 0) << 14; - encoding |= (mubuf->idxen ? 1 : 0) << 13; - assert(!mubuf->addr64 || ctx.chip_class <= GFX7); + encoding |= (mubuf.lds ? 1 : 0) << 16; + encoding |= (mubuf.glc ? 1 : 0) << 14; + encoding |= (mubuf.idxen ? 1 : 0) << 13; + assert(!mubuf.addr64 || ctx.chip_class <= GFX7); if (ctx.chip_class == GFX6 || ctx.chip_class == GFX7) - encoding |= (mubuf->addr64 ? 1 : 0) << 15; - encoding |= (mubuf->offen ? 1 : 0) << 12; + encoding |= (mubuf.addr64 ? 1 : 0) << 15; + encoding |= (mubuf.offen ? 1 : 0) << 12; if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { - assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */ - encoding |= (mubuf->slc ? 1 : 0) << 17; + assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */ + encoding |= (mubuf.slc ? 1 : 0) << 17; } else if (ctx.chip_class >= GFX10) { - encoding |= (mubuf->dlc ? 1 : 0) << 15; + encoding |= (mubuf.dlc ? 1 : 0) << 15; } - encoding |= 0x0FFF & mubuf->offset; + encoding |= 0x0FFF & mubuf.offset; out.push_back(encoding); encoding = 0; if (ctx.chip_class <= GFX7 || ctx.chip_class >= GFX10) { - encoding |= (mubuf->slc ? 1 : 0) << 22; + encoding |= (mubuf.slc ? 1 : 0) << 22; } encoding |= instr->operands[2].physReg() << 24; - encoding |= (mubuf->tfe ? 1 : 0) << 23; + encoding |= (mubuf.tfe ? 1 : 0) << 23; encoding |= (instr->operands[0].physReg() >> 2) << 16; unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg(); encoding |= (0xFF & reg) << 8; @@ -390,17 +390,17 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; } case Format::MTBUF: { - MTBUF_instruction* mtbuf = instr->mtbuf(); + MTBUF_instruction& mtbuf = instr->mtbuf(); - uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt); + uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf.dfmt, mtbuf.nfmt); uint32_t encoding = (0b111010 << 26); assert(img_format <= 0x7F); - assert(!mtbuf->dlc || ctx.chip_class >= GFX10); - encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */ - encoding |= (mtbuf->glc ? 1 : 0) << 14; - encoding |= (mtbuf->idxen ? 1 : 0) << 13; - encoding |= (mtbuf->offen ? 1 : 0) << 12; - encoding |= 0x0FFF & mtbuf->offset; + assert(!mtbuf.dlc || ctx.chip_class >= GFX10); + encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */ + encoding |= (mtbuf.glc ? 1 : 0) << 14; + encoding |= (mtbuf.idxen ? 1 : 0) << 13; + encoding |= (mtbuf.offen ? 1 : 0) << 12; + encoding |= 0x0FFF & mtbuf.offset; encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */ if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { @@ -413,8 +413,8 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding = 0; encoding |= instr->operands[2].physReg() << 24; - encoding |= (mtbuf->tfe ? 1 : 0) << 23; - encoding |= (mtbuf->slc ? 1 : 0) << 22; + encoding |= (mtbuf.tfe ? 1 : 0) << 23; + encoding |= (mtbuf.slc ? 1 : 0) << 22; encoding |= (instr->operands[0].physReg() >> 2) << 16; unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg(); encoding |= (0xFF & reg) << 8; @@ -437,26 +437,26 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* assert(!use_nsa || ctx.chip_class >= GFX10); unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0; - MIMG_instruction* mimg = instr->mimg(); + MIMG_instruction& mimg = instr->mimg(); uint32_t encoding = (0b111100 << 26); - encoding |= mimg->slc ? 1 << 25 : 0; + encoding |= mimg.slc ? 1 << 25 : 0; encoding |= opcode << 18; - encoding |= mimg->lwe ? 1 << 17 : 0; - encoding |= mimg->tfe ? 1 << 16 : 0; - encoding |= mimg->glc ? 1 << 13 : 0; - encoding |= mimg->unrm ? 1 << 12 : 0; + encoding |= mimg.lwe ? 1 << 17 : 0; + encoding |= mimg.tfe ? 1 << 16 : 0; + encoding |= mimg.glc ? 1 << 13 : 0; + encoding |= mimg.unrm ? 1 << 12 : 0; if (ctx.chip_class <= GFX9) { - assert(!mimg->dlc); /* Device-level coherent is not supported on GFX9 and lower */ - assert(!mimg->r128); - encoding |= mimg->a16 ? 1 << 15 : 0; - encoding |= mimg->da ? 1 << 14 : 0; + assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */ + assert(!mimg.r128); + encoding |= mimg.a16 ? 1 << 15 : 0; + encoding |= mimg.da ? 1 << 14 : 0; } else { - encoding |= mimg->r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */ + encoding |= mimg.r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */ encoding |= nsa_dwords << 1; - encoding |= mimg->dim << 3; /* GFX10: dimensionality instead of declare array */ - encoding |= mimg->dlc ? 1 << 7 : 0; + encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */ + encoding |= mimg.dlc ? 1 << 7 : 0; } - encoding |= (0xF & mimg->dmask) << 8; + encoding |= (0xF & mimg.dmask) << 8; out.push_back(encoding); encoding = (0xFF & instr->operands[3].physReg()); /* VADDR */ if (!instr->definitions.empty()) { @@ -468,10 +468,10 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* if (!instr->operands[1].isUndefined()) encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */ - assert(!mimg->d16 || ctx.chip_class >= GFX9); - encoding |= mimg->d16 ? 1 << 15 : 0; + assert(!mimg.d16 || ctx.chip_class >= GFX9); + encoding |= mimg.d16 ? 1 << 15 : 0; if (ctx.chip_class >= GFX10) { - encoding |= mimg->a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */ + encoding |= mimg.a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */ } out.push_back(encoding); @@ -487,33 +487,33 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* case Format::FLAT: case Format::SCRATCH: case Format::GLOBAL: { - FLAT_instruction *flat = instr->flatlike(); + FLAT_instruction& flat = instr->flatlike(); uint32_t encoding = (0b110111 << 26); encoding |= opcode << 18; if (ctx.chip_class <= GFX9) { - assert(flat->offset <= 0x1fff); - encoding |= flat->offset & 0x1fff; + assert(flat.offset <= 0x1fff); + encoding |= flat.offset & 0x1fff; } else if (instr->isFlat()) { /* GFX10 has a 12-bit immediate OFFSET field, * but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug */ - assert(flat->offset == 0); + assert(flat.offset == 0); } else { - assert(flat->offset <= 0xfff); - encoding |= flat->offset & 0xfff; + assert(flat.offset <= 0xfff); + encoding |= flat.offset & 0xfff; } if (instr->isScratch()) encoding |= 1 << 14; else if (instr->isGlobal()) encoding |= 2 << 14; - encoding |= flat->lds ? 1 << 13 : 0; - encoding |= flat->glc ? 1 << 16 : 0; - encoding |= flat->slc ? 1 << 17 : 0; + encoding |= flat.lds ? 1 << 13 : 0; + encoding |= flat.glc ? 1 << 16 : 0; + encoding |= flat.slc ? 1 << 17 : 0; if (ctx.chip_class >= GFX10) { - assert(!flat->nv); - encoding |= flat->dlc ? 1 << 12 : 0; + assert(!flat.nv); + encoding |= flat.dlc ? 1 << 12 : 0; } else { - assert(!flat->dlc); + assert(!flat.dlc); } out.push_back(encoding); encoding = (0xFF & instr->operands[0].physReg()); @@ -531,12 +531,12 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* else encoding |= sgpr_null << 16; } - encoding |= flat->nv ? 1 << 23 : 0; + encoding |= flat.nv ? 1 << 23 : 0; out.push_back(encoding); break; } case Format::EXP: { - Export_instruction* exp = instr->exp(); + Export_instruction& exp = instr->exp(); uint32_t encoding; if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) { encoding = (0b110001 << 26); @@ -544,16 +544,16 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding = (0b111110 << 26); } - encoding |= exp->valid_mask ? 0b1 << 12 : 0; - encoding |= exp->done ? 0b1 << 11 : 0; - encoding |= exp->compressed ? 0b1 << 10 : 0; - encoding |= exp->dest << 4; - encoding |= exp->enabled_mask; + encoding |= exp.valid_mask ? 0b1 << 12 : 0; + encoding |= exp.done ? 0b1 << 11 : 0; + encoding |= exp.compressed ? 0b1 << 10 : 0; + encoding |= exp.dest << 4; + encoding |= exp.enabled_mask; out.push_back(encoding); - encoding = 0xFF & exp->operands[0].physReg(); - encoding |= (0xFF & exp->operands[1].physReg()) << 8; - encoding |= (0xFF & exp->operands[2].physReg()) << 16; - encoding |= (0xFF & exp->operands[3].physReg()) << 24; + encoding = 0xFF & exp.operands[0].physReg(); + encoding |= (0xFF & exp.operands[1].physReg()) << 8; + encoding |= (0xFF & exp.operands[2].physReg()) << 16; + encoding |= (0xFF & exp.operands[3].physReg()) << 24; out.push_back(encoding); break; } @@ -564,7 +564,7 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* break; default: if (instr->isVOP3()) { - VOP3_instruction* vop3 = instr->vop3(); + VOP3_instruction& vop3 = instr->vop3(); if (instr->isVOP2()) { opcode = opcode + 0x100; @@ -590,14 +590,14 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* if (ctx.chip_class <= GFX7) { encoding |= opcode << 17; - encoding |= (vop3->clamp ? 1 : 0) << 11; + encoding |= (vop3.clamp ? 1 : 0) << 11; } else { encoding |= opcode << 16; - encoding |= (vop3->clamp ? 1 : 0) << 15; + encoding |= (vop3.clamp ? 1 : 0) << 15; } - encoding |= vop3->opsel << 11; + encoding |= vop3.opsel << 11; for (unsigned i = 0; i < 3; i++) - encoding |= vop3->abs[i] << (8+i); + encoding |= vop3.abs[i] << (8+i); if (instr->definitions.size() == 2) encoding |= instr->definitions[1].physReg() << 8; encoding |= (0xFF & instr->definitions[0].physReg()); @@ -609,13 +609,13 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* for (unsigned i = 0; i < instr->operands.size(); i++) encoding |= instr->operands[i].physReg() << (i * 9); } - encoding |= vop3->omod << 27; + encoding |= vop3.omod << 27; for (unsigned i = 0; i < 3; i++) - encoding |= vop3->neg[i] << (29+i); + encoding |= vop3.neg[i] << (29+i); out.push_back(encoding); } else if (instr->isVOP3P()) { - VOP3P_instruction* vop3 = instr->vop3p(); + VOP3P_instruction& vop3 = instr->vop3p(); uint32_t encoding; if (ctx.chip_class == GFX9) { @@ -627,45 +627,45 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* } encoding |= opcode << 16; - encoding |= (vop3->clamp ? 1 : 0) << 15; - encoding |= vop3->opsel_lo << 11; - encoding |= ((vop3->opsel_hi & 0x4) ? 1 : 0) << 14; + encoding |= (vop3.clamp ? 1 : 0) << 15; + encoding |= vop3.opsel_lo << 11; + encoding |= ((vop3.opsel_hi & 0x4) ? 1 : 0) << 14; for (unsigned i = 0; i < 3; i++) - encoding |= vop3->neg_hi[i] << (8+i); + encoding |= vop3.neg_hi[i] << (8+i); encoding |= (0xFF & instr->definitions[0].physReg()); out.push_back(encoding); encoding = 0; for (unsigned i = 0; i < instr->operands.size(); i++) encoding |= instr->operands[i].physReg() << (i * 9); - encoding |= (vop3->opsel_hi & 0x3) << 27; + encoding |= (vop3.opsel_hi & 0x3) << 27; for (unsigned i = 0; i < 3; i++) - encoding |= vop3->neg_lo[i] << (29+i); + encoding |= vop3.neg_lo[i] << (29+i); out.push_back(encoding); } else if (instr->isDPP()){ assert(ctx.chip_class >= GFX8); - DPP_instruction* dpp = instr->dpp(); + DPP_instruction& dpp = instr->dpp(); /* first emit the instruction without the DPP operand */ Operand dpp_op = instr->operands[0]; instr->operands[0] = Operand(PhysReg{250}, v1); instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::DPP); emit_instruction(ctx, out, instr); - uint32_t encoding = (0xF & dpp->row_mask) << 28; - encoding |= (0xF & dpp->bank_mask) << 24; - encoding |= dpp->abs[1] << 23; - encoding |= dpp->neg[1] << 22; - encoding |= dpp->abs[0] << 21; - encoding |= dpp->neg[0] << 20; + uint32_t encoding = (0xF & dpp.row_mask) << 28; + encoding |= (0xF & dpp.bank_mask) << 24; + encoding |= dpp.abs[1] << 23; + encoding |= dpp.neg[1] << 22; + encoding |= dpp.abs[0] << 21; + encoding |= dpp.neg[0] << 20; if (ctx.chip_class >= GFX10) encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */ - encoding |= dpp->bound_ctrl << 19; - encoding |= dpp->dpp_ctrl << 8; + encoding |= dpp.bound_ctrl << 19; + encoding |= dpp.dpp_ctrl << 8; encoding |= (0xFF) & dpp_op.physReg(); out.push_back(encoding); return; } else if (instr->isSDWA()) { - SDWA_instruction* sdwa = instr->sdwa(); + SDWA_instruction& sdwa = instr->sdwa(); /* first emit the instruction without the SDWA operand */ Operand sdwa_op = instr->operands[0]; @@ -680,27 +680,27 @@ void emit_instruction(asm_context& ctx, std::vector& out, Instruction* encoding |= instr->definitions[0].physReg() << 8; encoding |= 1 << 15; } - encoding |= (sdwa->clamp ? 1 : 0) << 13; + encoding |= (sdwa.clamp ? 1 : 0) << 13; } else { - encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8; - uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0; - if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra)) + encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8; + uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0; + if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra)) dst_u = 2; encoding |= dst_u << 11; - encoding |= (sdwa->clamp ? 1 : 0) << 13; - encoding |= sdwa->omod << 14; + encoding |= (sdwa.clamp ? 1 : 0) << 13; + encoding |= sdwa.omod << 14; } - encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16; - encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0; - encoding |= sdwa->abs[0] << 21; - encoding |= sdwa->neg[0] << 20; + encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16; + encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0; + encoding |= sdwa.abs[0] << 21; + encoding |= sdwa.neg[0] << 20; if (instr->operands.size() >= 2) { - encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24; - encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0; - encoding |= sdwa->abs[1] << 29; - encoding |= sdwa->neg[1] << 28; + encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24; + encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0; + encoding |= sdwa.abs[1] << 29; + encoding |= sdwa.neg[1] << 28; } encoding |= 0xFF & sdwa_op.physReg(); @@ -750,16 +750,16 @@ void fix_exports(asm_context& ctx, std::vector& out, Program* program) while ( it != block.instructions.rend()) { if ((*it)->isEXP()) { - Export_instruction* exp = (*it)->exp(); + Export_instruction& exp = (*it)->exp(); if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) { - if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) { - exp->done = true; + if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) { + exp.done = true; exported = true; break; } } else { - exp->done = true; - exp->valid_mask = true; + exp.done = true; + exp.valid_mask = true; exported = true; break; } diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 5c59e6a..57b6f1b 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -180,7 +180,7 @@ struct NOP_ctx_gfx10 { int get_wait_states(aco_ptr& instr) { if (instr->opcode == aco_opcode::s_nop) - return instr->sopp()->imm + 1; + return instr->sopp().imm + 1; else if (instr->opcode == aco_opcode::p_constaddr) return 3; /* lowered to 3 instructions in the assembler */ else @@ -351,7 +351,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata) NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace); - } else if (instr->isDS() && instr->ds()->gds) { + } else if (instr->isDS() && instr->ds().gds) { NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace); } else if (instr->isVALU() || instr->isVINTRP()) { for (Operand op : instr->operands) { @@ -407,7 +407,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c if (program->chip_class == GFX9) { bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) && - instr->flatlike()->lds; + instr->flatlike().lds; if (instr->isVINTRP() || instr->opcode == aco_opcode::ds_read_addtid_b32 || instr->opcode == aco_opcode::ds_write_addtid_b32 || @@ -480,10 +480,10 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c ctx.salu_wr_m0_then_moverel = 1; } } else if (instr->opcode == aco_opcode::s_setreg_b32 || instr->opcode == aco_opcode::s_setreg_imm32_b32) { - SOPK_instruction *sopk = instr->sopk(); - unsigned offset = (sopk->imm >> 6) & 0x1f; - unsigned size = ((sopk->imm >> 11) & 0x1f) + 1; - unsigned reg = sopk->imm & 0x3f; + SOPK_instruction& sopk = instr->sopk(); + unsigned offset = (sopk.imm >> 6) & 0x1f; + unsigned size = ((sopk.imm >> 11) & 0x1f) + 1; + unsigned reg = sopk.imm & 0x3f; ctx.setreg_then_getsetreg = 2; if (reg == 1 && offset >= 28 && size > (28 - offset)) @@ -603,13 +603,13 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 } else if (instr->isSALU() || instr->isSMEM()) { if (instr->opcode == aco_opcode::s_waitcnt) { /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */ - uint16_t imm = instr->sopp()->imm; + uint16_t imm = instr->sopp().imm; unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10); if (vmcnt == 0) ctx.sgprs_read_by_VMEM.reset(); } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) { /* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */ - if (instr->sopp()->imm == 0xffe3) + if (instr->sopp().imm == 0xffe3) ctx.sgprs_read_by_VMEM.reset(); } @@ -667,7 +667,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 } } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) { /* s_waitcnt_depctr can mitigate the problem if it has a magic imm */ - if ((instr->sopp()->imm & 0xfffe) == 0xfffe) + if ((instr->sopp().imm & 0xfffe) == 0xfffe) ctx.has_nonVALU_exec_read = false; } @@ -694,12 +694,12 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 ctx.sgprs_read_by_SMEM.reset(); } else { /* Reducing lgkmcnt count to 0 always mitigates the hazard. */ - const SOPP_instruction *sopp = instr->sopp(); - if (sopp->opcode == aco_opcode::s_waitcnt_lgkmcnt) { - if (sopp->imm == 0 && sopp->definitions[0].physReg() == sgpr_null) + const SOPP_instruction& sopp = instr->sopp(); + if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) { + if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null) ctx.sgprs_read_by_SMEM.reset(); - } else if (sopp->opcode == aco_opcode::s_waitcnt) { - unsigned lgkm = (sopp->imm >> 8) & 0x3f; + } else if (sopp.opcode == aco_opcode::s_waitcnt) { + unsigned lgkm = (sopp.imm >> 8) & 0x3f; if (lgkm == 0) ctx.sgprs_read_by_SMEM.reset(); } @@ -724,8 +724,8 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10 ctx.has_branch_after_DS = ctx.has_DS; } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) { /* Only s_waitcnt_vscnt can mitigate the hazard */ - const SOPK_instruction *sopk = instr->sopk(); - if (sopk->definitions[0].physReg() == sgpr_null && sopk->imm == 0) + const SOPK_instruction& sopk = instr->sopk(); + if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0) ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false; } if ((ctx.has_VMEM && ctx.has_branch_after_DS) || (ctx.has_DS && ctx.has_branch_after_VMEM)) { diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index d5153a1..cdc7a70 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -98,13 +98,13 @@ struct exec_ctx { bool needs_exact(aco_ptr& instr) { if (instr->isMUBUF()) { - return instr->mubuf()->disable_wqm; + return instr->mubuf().disable_wqm; } else if (instr->isMTBUF()) { - return instr->mtbuf()->disable_wqm; + return instr->mtbuf().disable_wqm; } else if (instr->isMIMG()) { - return instr->mimg()->disable_wqm; + return instr->mimg().disable_wqm; } else if (instr->isFlatLike()) { - return instr->flatlike()->disable_wqm; + return instr->flatlike().disable_wqm; } else { return instr->isEXP(); } @@ -908,12 +908,12 @@ void add_branch_code(exec_ctx& ctx, Block* block) } if (block->kind & block_kind_uniform) { - Pseudo_branch_instruction* branch = block->instructions.back()->branch(); - if (branch->opcode == aco_opcode::p_branch) { - branch->target[0] = block->linear_succs[0]; + Pseudo_branch_instruction& branch = block->instructions.back()->branch(); + if (branch.opcode == aco_opcode::p_branch) { + branch.target[0] = block->linear_succs[0]; } else { - branch->target[0] = block->linear_succs[1]; - branch->target[1] = block->linear_succs[0]; + branch.target[0] = block->linear_succs[1]; + branch.target[1] = block->linear_succs[0]; } return; } diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 2c5c115..30155e3 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -422,7 +422,7 @@ wait_imm check_instr(Instruction* instr, wait_ctx& ctx) continue; /* LDS reads and writes return in the order they were issued. same for GDS */ - if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds)) + if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds().gds ? event_gds : event_lds)) continue; wait.combine(it->second.imm); @@ -437,10 +437,10 @@ wait_imm parse_wait_instr(wait_ctx& ctx, Instruction *instr) if (instr->opcode == aco_opcode::s_waitcnt_vscnt && instr->definitions[0].physReg() == sgpr_null) { wait_imm imm; - imm.vs = std::min(imm.vs, instr->sopk()->imm); + imm.vs = std::min(imm.vs, instr->sopk().imm); return imm; } else if (instr->opcode == aco_opcode::s_waitcnt) { - return wait_imm(ctx.chip_class, instr->sopp()->imm); + return wait_imm(ctx.chip_class, instr->sopp().imm); } return wait_imm(); } @@ -521,15 +521,15 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info) * TODO: Refine this when we have proper alias analysis. */ if (ctx.pending_s_buffer_store && - !instr->smem()->definitions.empty() && - !instr->smem()->sync.can_reorder()) { + !instr->smem().definitions.empty() && + !instr->smem().sync.can_reorder()) { imm.lgkm = 0; } } if (ctx.program->early_rast && instr->opcode == aco_opcode::exp) { - if (instr->exp()->dest >= V_008DFC_SQ_EXP_POS && - instr->exp()->dest < V_008DFC_SQ_EXP_PRIM) { + if (instr->exp().dest >= V_008DFC_SQ_EXP_POS && + instr->exp().dest < V_008DFC_SQ_EXP_PRIM) { /* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export. * Wait for all stores (and atomics) to complete, so PS can read them. @@ -543,7 +543,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info) } if (instr->opcode == aco_opcode::p_barrier) - imm.combine(perform_barrier(ctx, instr->barrier()->sync, semantic_acqrel)); + imm.combine(perform_barrier(ctx, instr->barrier().sync, semantic_acqrel)); else imm.combine(perform_barrier(ctx, sync_info, semantic_release)); @@ -760,12 +760,12 @@ void gen(Instruction* instr, wait_ctx& ctx) { switch (instr->format) { case Format::EXP: { - Export_instruction* exp_instr = instr->exp(); + Export_instruction& exp_instr = instr->exp(); wait_event ev; - if (exp_instr->dest <= 9) + if (exp_instr.dest <= 9) ev = event_exp_mrt_null; - else if (exp_instr->dest <= 15) + else if (exp_instr.dest <= 15) ev = event_exp_pos; else ev = event_exp_param; @@ -774,10 +774,10 @@ void gen(Instruction* instr, wait_ctx& ctx) /* insert new entries for exported vgprs */ for (unsigned i = 0; i < 4; i++) { - if (exp_instr->enabled_mask & (1 << i)) { - unsigned idx = exp_instr->compressed ? i >> 1 : i; - assert(idx < exp_instr->operands.size()); - insert_wait_entry(ctx, exp_instr->operands[idx], ev); + if (exp_instr.enabled_mask & (1 << i)) { + unsigned idx = exp_instr.compressed ? i >> 1 : i; + assert(idx < exp_instr.operands.size()); + insert_wait_entry(ctx, exp_instr.operands[idx], ev); } } @@ -785,38 +785,38 @@ void gen(Instruction* instr, wait_ctx& ctx) break; } case Format::FLAT: { - FLAT_instruction *flat = instr->flat(); + FLAT_instruction& flat = instr->flat(); if (ctx.chip_class < GFX10 && !instr->definitions.empty()) - update_counters_for_flat_load(ctx, flat->sync); + update_counters_for_flat_load(ctx, flat.sync); else - update_counters(ctx, event_flat, flat->sync); + update_counters(ctx, event_flat, flat.sync); if (!instr->definitions.empty()) insert_wait_entry(ctx, instr->definitions[0], event_flat); break; } case Format::SMEM: { - SMEM_instruction *smem = instr->smem(); - update_counters(ctx, event_smem, smem->sync); + SMEM_instruction& smem = instr->smem(); + update_counters(ctx, event_smem, smem.sync); if (!instr->definitions.empty()) insert_wait_entry(ctx, instr->definitions[0], event_smem); else if (ctx.chip_class >= GFX10 && - !smem->sync.can_reorder()) + !smem.sync.can_reorder()) ctx.pending_s_buffer_store = true; break; } case Format::DS: { - DS_instruction *ds = instr->ds(); - update_counters(ctx, ds->gds ? event_gds : event_lds, ds->sync); - if (ds->gds) + DS_instruction& ds = instr->ds(); + update_counters(ctx, ds.gds ? event_gds : event_lds, ds.sync); + if (ds.gds) update_counters(ctx, event_gds_gpr_lock); if (!instr->definitions.empty()) - insert_wait_entry(ctx, instr->definitions[0], ds->gds ? event_gds : event_lds); + insert_wait_entry(ctx, instr->definitions[0], ds.gds ? event_gds : event_lds); - if (ds->gds) { + if (ds.gds) { for (const Operand& op : instr->operands) insert_wait_entry(ctx, op, event_gds_gpr_lock); insert_wait_entry(ctx, exec, s2, event_gds_gpr_lock, false); diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 4c27e58..0d3e50f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val) Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1); Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v); - add->vop3()->neg[1] = true; + add->vop3().neg[1] = true; return add->definitions[0].getTemp(); } @@ -1692,7 +1692,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) std::swap(src0, src1); add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr; } - add_instr->vop3()->clamp = 1; + add_instr->vop3().clamp = 1; } else if (dst.regClass() == v1) { if (ctx->options->chip_class >= GFX9) { aco_ptr add{create_instruction(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)}; @@ -1944,9 +1944,9 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) case nir_op_fsub: { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst); - VOP3P_instruction* sub = add->vop3p(); - sub->neg_lo[1] = true; - sub->neg_hi[1] = true; + VOP3P_instruction& sub = add->vop3p(); + sub.neg_lo[1] = true; + sub.neg_hi[1] = true; break; } @@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) } else if (dst.regClass() == v2) { Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0), as_vgpr(ctx, src1)); - add->vop3()->neg[1] = true; + add->vop3().neg[1] = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -2101,7 +2101,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp src = get_alu_src_vop3p(ctx, instr->src[0]); Instruction* vop3p = bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0x3C00)), instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); - vop3p->vop3p()->clamp = true; + vop3p->vop3p().clamp = true; emit_split_vector(ctx, dst, 2); break; } @@ -2114,7 +2114,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) // TODO: confirm that this holds under any circumstances } else if (dst.regClass() == v2) { Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u)); - add->vop3()->clamp = true; + add->vop3().clamp = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -2253,12 +2253,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi)); Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); - sub->vop3()->neg[1] = true; + sub->vop3().neg[1] = true; tmp = sub->definitions[0].getTemp(); Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v); - vop3->vop3()->abs[0] = true; + vop3->vop3().abs[0] = true; Temp cond = vop3->definitions[0].getTemp(); Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1); @@ -2924,7 +2924,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr) f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest); - vop3->vop3()->abs[0] = true; + vop3->vop3().abs[0] = true; cmp_res = vop3->definitions[0].getTemp(); } @@ -3515,7 +3515,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info, instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1); else instr = bld.ds(op, Definition(val), offset, m, const_offset); - instr->ds()->sync = info.sync; + instr->ds().sync = info.sync; if (size < 4) val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u)); @@ -3931,7 +3931,7 @@ void store_lds(isel_context *ctx, unsigned elem_size_bytes, Temp data, uint32_t } else { instr = bld.ds(op, address_offset, split_data, m, inline_offset); } - instr->ds()->sync = memory_sync_info(storage_shared); + instr->ds().sync = memory_sync_info(storage_shared); } } @@ -4094,7 +4094,7 @@ void emit_single_mubuf_store(isel_context *ctx, Temp descriptor, Temp voffset, T /* idxen*/ false, /* addr64 */ false, /* disable_wqm */ false, /* glc */ true, /* dlc*/ false, /* slc */ slc); - r.instr->mubuf()->sync = sync; + r.instr->mubuf().sync = sync; } void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset, @@ -5503,7 +5503,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr) unreachable("unimplemented or forbidden load_push_constant."); } - bld.smem(op, Definition(vec), ptr, index).instr->smem()->prevent_overflow = true; + bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true; if (!aligned) { Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index); @@ -7147,7 +7147,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) { for (unsigned i = 0; i < write_count; i++) { aco_opcode op = get_buffer_store_op(write_datas[i].bytes()); Instruction *mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true, true); - mubuf->mubuf()->sync = memory_sync_info(storage_scratch, semantic_private); + mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private); } } diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index ed3a43a..90d0f5e 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -140,19 +140,19 @@ memory_sync_info get_sync_info(const Instruction* instr) { switch (instr->format) { case Format::SMEM: - return instr->smem()->sync; + return instr->smem().sync; case Format::MUBUF: - return instr->mubuf()->sync; + return instr->mubuf().sync; case Format::MIMG: - return instr->mimg()->sync; + return instr->mimg().sync; case Format::MTBUF: - return instr->mtbuf()->sync; + return instr->mtbuf().sync; case Format::FLAT: case Format::GLOBAL: case Format::SCRATCH: - return instr->flatlike()->sync; + return instr->flatlike().sync; case Format::DS: - return instr->ds()->sync; + return instr->ds().sync; default: return memory_sync_info(); } @@ -170,12 +170,12 @@ bool can_use_SDWA(chip_class chip, const aco_ptr& instr) return true; if (instr->isVOP3()) { - VOP3_instruction *vop3 = instr->vop3(); + VOP3_instruction& vop3 = instr->vop3(); if (instr->format == Format::VOP3) return false; - if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8) + if (vop3.clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8) return false; - if (vop3->omod && chip < GFX9) + if (vop3.omod && chip < GFX9) return false; //TODO: return true if we know we will use vcc @@ -232,14 +232,14 @@ aco_ptr convert_to_SDWA(chip_class chip, aco_ptr& inst std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin()); std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin()); - SDWA_instruction *sdwa = instr->sdwa(); + SDWA_instruction& sdwa = instr->sdwa(); if (tmp->isVOP3()) { - VOP3_instruction *vop3 = tmp->vop3(); - memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg)); - memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs)); - sdwa->omod = vop3->omod; - sdwa->clamp = vop3->clamp; + VOP3_instruction& vop3 = tmp->vop3(); + memcpy(sdwa.neg, vop3.neg, sizeof(sdwa.neg)); + memcpy(sdwa.abs, vop3.abs, sizeof(sdwa.abs)); + sdwa.omod = vop3.omod; + sdwa.clamp = vop3.clamp; } for (unsigned i = 0; i < instr->operands.size(); i++) { @@ -249,27 +249,27 @@ aco_ptr convert_to_SDWA(chip_class chip, aco_ptr& inst switch (instr->operands[i].bytes()) { case 1: - sdwa->sel[i] = sdwa_ubyte; + sdwa.sel[i] = sdwa_ubyte; break; case 2: - sdwa->sel[i] = sdwa_uword; + sdwa.sel[i] = sdwa_uword; break; case 4: - sdwa->sel[i] = sdwa_udword; + sdwa.sel[i] = sdwa_udword; break; } } switch (instr->definitions[0].bytes()) { case 1: - sdwa->dst_sel = sdwa_ubyte; - sdwa->dst_preserve = true; + sdwa.dst_sel = sdwa_ubyte; + sdwa.dst_preserve = true; break; case 2: - sdwa->dst_sel = sdwa_uword; - sdwa->dst_preserve = true; + sdwa.dst_sel = sdwa_uword; + sdwa.dst_preserve = true; break; case 4: - sdwa->dst_sel = sdwa_udword; + sdwa.dst_sel = sdwa_udword; break; } diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 62e4f0d..94564ff 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1030,93 +1030,93 @@ struct Instruction { return false; } - Pseudo_instruction *pseudo() noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;} - const Pseudo_instruction *pseudo() const noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;} + Pseudo_instruction& pseudo() noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;} + const Pseudo_instruction& pseudo() const noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;} constexpr bool isPseudo() const noexcept {return format == Format::PSEUDO;} - SOP1_instruction *sop1() noexcept {assert(isSOP1()); return (SOP1_instruction *)this;} - const SOP1_instruction *sop1() const noexcept {assert(isSOP1()); return (SOP1_instruction *)this;} + SOP1_instruction& sop1() noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;} + const SOP1_instruction& sop1() const noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;} constexpr bool isSOP1() const noexcept {return format == Format::SOP1;} - SOP2_instruction *sop2() noexcept {assert(isSOP2()); return (SOP2_instruction *)this;} - const SOP2_instruction *sop2() const noexcept {assert(isSOP2()); return (SOP2_instruction *)this;} + SOP2_instruction& sop2() noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;} + const SOP2_instruction& sop2() const noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;} constexpr bool isSOP2() const noexcept {return format == Format::SOP2;} - SOPK_instruction *sopk() noexcept {assert(isSOPK()); return (SOPK_instruction *)this;} - const SOPK_instruction *sopk() const noexcept {assert(isSOPK()); return (SOPK_instruction *)this;} + SOPK_instruction& sopk() noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;} + const SOPK_instruction& sopk() const noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;} constexpr bool isSOPK() const noexcept {return format == Format::SOPK;} - SOPP_instruction *sopp() noexcept {assert(isSOPP()); return (SOPP_instruction *)this;} - const SOPP_instruction *sopp() const noexcept {assert(isSOPP()); return (SOPP_instruction *)this;} + SOPP_instruction& sopp() noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;} + const SOPP_instruction& sopp() const noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;} constexpr bool isSOPP() const noexcept {return format == Format::SOPP;} - SOPC_instruction *sopc() noexcept {assert(isSOPC()); return (SOPC_instruction *)this;} - const SOPC_instruction *sopc() const noexcept {assert(isSOPC()); return (SOPC_instruction *)this;} + SOPC_instruction& sopc() noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;} + const SOPC_instruction& sopc() const noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;} constexpr bool isSOPC() const noexcept {return format == Format::SOPC;} - SMEM_instruction *smem() noexcept {assert(isSMEM()); return (SMEM_instruction *)this;} - const SMEM_instruction *smem() const noexcept {assert(isSMEM()); return (SMEM_instruction *)this;} + SMEM_instruction& smem() noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;} + const SMEM_instruction& smem() const noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;} constexpr bool isSMEM() const noexcept {return format == Format::SMEM;} - DS_instruction *ds() noexcept {assert(isDS()); return (DS_instruction *)this;} - const DS_instruction *ds() const noexcept {assert(isDS()); return (DS_instruction *)this;} + DS_instruction& ds() noexcept {assert(isDS()); return *(DS_instruction *)this;} + const DS_instruction& ds() const noexcept {assert(isDS()); return *(DS_instruction *)this;} constexpr bool isDS() const noexcept {return format == Format::DS;} - MTBUF_instruction *mtbuf() noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;} - const MTBUF_instruction *mtbuf() const noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;} + MTBUF_instruction& mtbuf() noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;} + const MTBUF_instruction& mtbuf() const noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;} constexpr bool isMTBUF() const noexcept {return format == Format::MTBUF;} - MUBUF_instruction *mubuf() noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;} - const MUBUF_instruction *mubuf() const noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;} + MUBUF_instruction& mubuf() noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;} + const MUBUF_instruction& mubuf() const noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;} constexpr bool isMUBUF() const noexcept {return format == Format::MUBUF;} - MIMG_instruction *mimg() noexcept {assert(isMIMG()); return (MIMG_instruction *)this;} - const MIMG_instruction *mimg() const noexcept {assert(isMIMG()); return (MIMG_instruction *)this;} + MIMG_instruction& mimg() noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;} + const MIMG_instruction& mimg() const noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;} constexpr bool isMIMG() const noexcept {return format == Format::MIMG;} - Export_instruction *exp() noexcept {assert(isEXP()); return (Export_instruction *)this;} - const Export_instruction *exp() const noexcept {assert(isEXP()); return (Export_instruction *)this;} + Export_instruction& exp() noexcept {assert(isEXP()); return *(Export_instruction *)this;} + const Export_instruction& exp() const noexcept {assert(isEXP()); return *(Export_instruction *)this;} constexpr bool isEXP() const noexcept {return format == Format::EXP;} - FLAT_instruction *flat() noexcept {assert(isFlat()); return (FLAT_instruction *)this;} - const FLAT_instruction *flat() const noexcept {assert(isFlat()); return (FLAT_instruction *)this;} + FLAT_instruction& flat() noexcept {assert(isFlat()); return *(FLAT_instruction *)this;} + const FLAT_instruction& flat() const noexcept {assert(isFlat()); return *(FLAT_instruction *)this;} constexpr bool isFlat() const noexcept {return format == Format::FLAT;} - FLAT_instruction *global() noexcept {assert(isGlobal()); return (FLAT_instruction *)this;} - const FLAT_instruction *global() const noexcept {assert(isGlobal()); return (FLAT_instruction *)this;} + FLAT_instruction& global() noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;} + const FLAT_instruction& global() const noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;} constexpr bool isGlobal() const noexcept {return format == Format::GLOBAL;} - FLAT_instruction *scratch() noexcept {assert(isScratch()); return (FLAT_instruction *)this;} - const FLAT_instruction *scratch() const noexcept {assert(isScratch()); return (FLAT_instruction *)this;} + FLAT_instruction& scratch() noexcept {assert(isScratch()); return *(FLAT_instruction *)this;} + const FLAT_instruction& scratch() const noexcept {assert(isScratch()); return *(FLAT_instruction *)this;} constexpr bool isScratch() const noexcept {return format == Format::SCRATCH;} - Pseudo_branch_instruction *branch() noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;} - const Pseudo_branch_instruction *branch() const noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;} + Pseudo_branch_instruction& branch() noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;} + const Pseudo_branch_instruction& branch() const noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;} constexpr bool isBranch() const noexcept {return format == Format::PSEUDO_BRANCH;} - Pseudo_barrier_instruction *barrier() noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;} - const Pseudo_barrier_instruction *barrier() const noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;} + Pseudo_barrier_instruction& barrier() noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;} + const Pseudo_barrier_instruction& barrier() const noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;} constexpr bool isBarrier() const noexcept {return format == Format::PSEUDO_BARRIER;} - Pseudo_reduction_instruction *reduction() noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;} - const Pseudo_reduction_instruction *reduction() const noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;} + Pseudo_reduction_instruction& reduction() noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;} + const Pseudo_reduction_instruction& reduction() const noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;} constexpr bool isReduction() const noexcept {return format == Format::PSEUDO_REDUCTION;} - VOP3P_instruction *vop3p() noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;} - const VOP3P_instruction *vop3p() const noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;} + VOP3P_instruction& vop3p() noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;} + const VOP3P_instruction& vop3p() const noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;} constexpr bool isVOP3P() const noexcept {return format == Format::VOP3P;} - VOP1_instruction *vop1() noexcept {assert(isVOP1()); return (VOP1_instruction *)this;} - const VOP1_instruction *vop1() const noexcept {assert(isVOP1()); return (VOP1_instruction *)this;} + VOP1_instruction& vop1() noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;} + const VOP1_instruction& vop1() const noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;} constexpr bool isVOP1() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP1;} - VOP2_instruction *vop2() noexcept {assert(isVOP2()); return (VOP2_instruction *)this;} - const VOP2_instruction *vop2() const noexcept {assert(isVOP2()); return (VOP2_instruction *)this;} + VOP2_instruction& vop2() noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;} + const VOP2_instruction& vop2() const noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;} constexpr bool isVOP2() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP2;} - VOPC_instruction *vopc() noexcept {assert(isVOPC()); return (VOPC_instruction *)this;} - const VOPC_instruction *vopc() const noexcept {assert(isVOPC()); return (VOPC_instruction *)this;} + VOPC_instruction& vopc() noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;} + const VOPC_instruction& vopc() const noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;} constexpr bool isVOPC() const noexcept {return (uint16_t)format & (uint16_t)Format::VOPC;} - VOP3_instruction *vop3() noexcept {assert(isVOP3()); return (VOP3_instruction *)this;} - const VOP3_instruction *vop3() const noexcept {assert(isVOP3()); return (VOP3_instruction *)this;} + VOP3_instruction& vop3() noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;} + const VOP3_instruction& vop3() const noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;} constexpr bool isVOP3() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP3;} - Interp_instruction *vintrp() noexcept {assert(isVINTRP()); return (Interp_instruction *)this;} - const Interp_instruction *vintrp() const noexcept {assert(isVINTRP()); return (Interp_instruction *)this;} + Interp_instruction& vintrp() noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;} + const Interp_instruction& vintrp() const noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;} constexpr bool isVINTRP() const noexcept {return (uint16_t)format & (uint16_t)Format::VINTRP;} - DPP_instruction *dpp() noexcept {assert(isDPP()); return (DPP_instruction *)this;} - const DPP_instruction *dpp() const noexcept {assert(isDPP()); return (DPP_instruction *)this;} + DPP_instruction& dpp() noexcept {assert(isDPP()); return *(DPP_instruction *)this;} + const DPP_instruction& dpp() const noexcept {assert(isDPP()); return *(DPP_instruction *)this;} constexpr bool isDPP() const noexcept {return (uint16_t)format & (uint16_t)Format::DPP;} - SDWA_instruction *sdwa() noexcept {assert(isSDWA()); return (SDWA_instruction *)this;} - const SDWA_instruction *sdwa() const noexcept {assert(isSDWA()); return (SDWA_instruction *)this;} + SDWA_instruction& sdwa() noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;} + const SDWA_instruction& sdwa() const noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;} constexpr bool isSDWA() const noexcept {return (uint16_t)format & (uint16_t)Format::SDWA;} - FLAT_instruction *flatlike() + FLAT_instruction& flatlike() { - return (FLAT_instruction *)this; + return *(FLAT_instruction *)this; } - const FLAT_instruction *flatlike() const + const FLAT_instruction& flatlike() const { - return (FLAT_instruction *)this; + return *(FLAT_instruction *)this; } constexpr bool isFlatLike() const noexcept @@ -1524,23 +1524,23 @@ constexpr bool Instruction::usesModifiers() const noexcept return true; if (isVOP3P()) { - const VOP3P_instruction *vop3p = this->vop3p(); + const VOP3P_instruction& vop3p = this->vop3p(); for (unsigned i = 0; i < operands.size(); i++) { - if (vop3p->neg_lo[i] || vop3p->neg_hi[i]) + if (vop3p.neg_lo[i] || vop3p.neg_hi[i]) return true; /* opsel_hi must be 1 to not be considered a modifier - even for constants */ - if (!(vop3p->opsel_hi & (1 << i))) + if (!(vop3p.opsel_hi & (1 << i))) return true; } - return vop3p->opsel_lo || vop3p->clamp; + return vop3p.opsel_lo || vop3p.clamp; } else if (isVOP3()) { - const VOP3_instruction *vop3 = this->vop3(); + const VOP3_instruction& vop3 = this->vop3(); for (unsigned i = 0; i < operands.size(); i++) { - if (vop3->abs[i] || vop3->neg[i]) + if (vop3.abs[i] || vop3.neg[i]) return true; } - return vop3->opsel || vop3->clamp || vop3->omod; + return vop3.opsel || vop3.clamp || vop3.omod; } return false; } diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 180b38e..a212972 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */ + perm->vop3().opsel = 1; /* FI (Fetch Inactive) */ } bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX)); @@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig Definition(PhysReg{vtmp+i}, v1), Operand(PhysReg{tmp+i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)).instr; - perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */ + perm->vop3().opsel = 1; /* FI (Fetch Inactive) */ } emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); @@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op) if (dst.physReg().byte() == 2) { Operand def_lo(dst.physReg().advance(-2), v2b); Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op); - instr->vop3()->opsel = 0; + instr->vop3().opsel = 0; } else { assert(dst.physReg().byte() == 0); Operand def_hi(dst.physReg().advance(2), v2b); Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi); - instr->vop3()->opsel = 2; + instr->vop3().opsel = 2; } } else { uint32_t offset = dst.physReg().byte() * 8u; @@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo, if (can_use_pack) { Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi); /* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */ - instr->vop3()->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1); + instr->vop3().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1); return; } @@ -1810,7 +1810,7 @@ void lower_to_hw_instr(Program* program) aco_ptr& instr = block->instructions[instr_idx]; aco_ptr mov; if (instr->isPseudo() && instr->opcode != aco_opcode::p_unit_test) { - Pseudo_instruction *pi = instr->pseudo(); + Pseudo_instruction *pi = &instr->pseudo(); switch (instr->opcode) { @@ -1897,7 +1897,7 @@ void lower_to_hw_instr(Program* program) instr2->opcode == aco_opcode::p_logical_end) continue; else if (instr2->opcode == aco_opcode::exp && - instr2->exp()->dest == null_exp_dest) + instr2->exp().dest == null_exp_dest) continue; else if (instr2->opcode == aco_opcode::p_parallelcopy && instr2->definitions[0].isFixed() && @@ -1983,7 +1983,7 @@ void lower_to_hw_instr(Program* program) break; } } else if (instr->isBranch()) { - Pseudo_branch_instruction* branch = instr->branch(); + Pseudo_branch_instruction* branch = &instr->branch(); uint32_t target = branch->target[0]; /* check if all blocks from current to target are empty */ @@ -2055,20 +2055,20 @@ void lower_to_hw_instr(Program* program) } } else if (instr->isReduction()) { - Pseudo_reduction_instruction* reduce = instr->reduction(); - emit_reduction(&ctx, reduce->opcode, reduce->reduce_op, reduce->cluster_size, - reduce->operands[1].physReg(), // tmp - reduce->definitions[1].physReg(), // stmp - reduce->operands[2].physReg(), // vtmp - reduce->definitions[2].physReg(), // sitmp - reduce->operands[0], reduce->definitions[0]); + Pseudo_reduction_instruction& reduce = instr->reduction(); + emit_reduction(&ctx, reduce.opcode, reduce.reduce_op, reduce.cluster_size, + reduce.operands[1].physReg(), // tmp + reduce.definitions[1].physReg(), // stmp + reduce.operands[2].physReg(), // vtmp + reduce.definitions[2].physReg(), // sitmp + reduce.operands[0], reduce.definitions[0]); } else if (instr->isBarrier()) { - Pseudo_barrier_instruction* barrier = instr->barrier(); + Pseudo_barrier_instruction& barrier = instr->barrier(); /* Anything larger than a workgroup isn't possible. Anything * smaller requires no instructions and this pseudo instruction * would only be included to control optimizations. */ - bool emit_s_barrier = barrier->exec_scope == scope_workgroup && + bool emit_s_barrier = barrier.exec_scope == scope_workgroup && program->workgroup_size > program->wave_size; bld.insert(std::move(instr)); diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index de0eb94..42aa701 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -178,121 +178,121 @@ struct InstrPred { return false; if (a->isVOP3()) { - VOP3_instruction* a3 = a->vop3(); - VOP3_instruction* b3 = b->vop3(); + VOP3_instruction& a3 = a->vop3(); + VOP3_instruction& b3 = b->vop3(); for (unsigned i = 0; i < 3; i++) { - if (a3->abs[i] != b3->abs[i] || - a3->neg[i] != b3->neg[i]) + if (a3.abs[i] != b3.abs[i] || + a3.neg[i] != b3.neg[i]) return false; } - return a3->clamp == b3->clamp && - a3->omod == b3->omod && - a3->opsel == b3->opsel; + return a3.clamp == b3.clamp && + a3.omod == b3.omod && + a3.opsel == b3.opsel; } if (a->isDPP()) { - DPP_instruction* aDPP = a->dpp(); - DPP_instruction* bDPP = b->dpp(); - return aDPP->pass_flags == bDPP->pass_flags && - aDPP->dpp_ctrl == bDPP->dpp_ctrl && - aDPP->bank_mask == bDPP->bank_mask && - aDPP->row_mask == bDPP->row_mask && - aDPP->bound_ctrl == bDPP->bound_ctrl && - aDPP->abs[0] == bDPP->abs[0] && - aDPP->abs[1] == bDPP->abs[1] && - aDPP->neg[0] == bDPP->neg[0] && - aDPP->neg[1] == bDPP->neg[1]; + DPP_instruction& aDPP = a->dpp(); + DPP_instruction& bDPP = b->dpp(); + return aDPP.pass_flags == bDPP.pass_flags && + aDPP.dpp_ctrl == bDPP.dpp_ctrl && + aDPP.bank_mask == bDPP.bank_mask && + aDPP.row_mask == bDPP.row_mask && + aDPP.bound_ctrl == bDPP.bound_ctrl && + aDPP.abs[0] == bDPP.abs[0] && + aDPP.abs[1] == bDPP.abs[1] && + aDPP.neg[0] == bDPP.neg[0] && + aDPP.neg[1] == bDPP.neg[1]; } if (a->isSDWA()) { - SDWA_instruction* aSDWA = a->sdwa(); - SDWA_instruction* bSDWA = b->sdwa(); - return aSDWA->sel[0] == bSDWA->sel[0] && - aSDWA->sel[1] == bSDWA->sel[1] && - aSDWA->dst_sel == bSDWA->dst_sel && - aSDWA->abs[0] == bSDWA->abs[0] && - aSDWA->abs[1] == bSDWA->abs[1] && - aSDWA->neg[0] == bSDWA->neg[0] && - aSDWA->neg[1] == bSDWA->neg[1] && - aSDWA->dst_preserve == bSDWA->dst_preserve && - aSDWA->clamp == bSDWA->clamp && - aSDWA->omod == bSDWA->omod; + SDWA_instruction& aSDWA = a->sdwa(); + SDWA_instruction& bSDWA = b->sdwa(); + return aSDWA.sel[0] == bSDWA.sel[0] && + aSDWA.sel[1] == bSDWA.sel[1] && + aSDWA.dst_sel == bSDWA.dst_sel && + aSDWA.abs[0] == bSDWA.abs[0] && + aSDWA.abs[1] == bSDWA.abs[1] && + aSDWA.neg[0] == bSDWA.neg[0] && + aSDWA.neg[1] == bSDWA.neg[1] && + aSDWA.dst_preserve == bSDWA.dst_preserve && + aSDWA.clamp == bSDWA.clamp && + aSDWA.omod == bSDWA.omod; } switch (a->format) { case Format::SOPK: { if (a->opcode == aco_opcode::s_getreg_b32) return false; - SOPK_instruction* aK = a->sopk(); - SOPK_instruction* bK = b->sopk(); - return aK->imm == bK->imm; + SOPK_instruction& aK = a->sopk(); + SOPK_instruction& bK = b->sopk(); + return aK.imm == bK.imm; } case Format::SMEM: { - SMEM_instruction* aS = a->smem(); - SMEM_instruction* bS = b->smem(); + SMEM_instruction& aS = a->smem(); + SMEM_instruction& bS = b->smem(); /* isel shouldn't be creating situations where this assertion fails */ - assert(aS->prevent_overflow == bS->prevent_overflow); - return aS->sync.can_reorder() && bS->sync.can_reorder() && - aS->sync == bS->sync && aS->glc == bS->glc && aS->dlc == bS->dlc && - aS->nv == bS->nv && aS->disable_wqm == bS->disable_wqm && - aS->prevent_overflow == bS->prevent_overflow; + assert(aS.prevent_overflow == bS.prevent_overflow); + return aS.sync.can_reorder() && bS.sync.can_reorder() && + aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc && + aS.nv == bS.nv && aS.disable_wqm == bS.disable_wqm && + aS.prevent_overflow == bS.prevent_overflow; } case Format::VINTRP: { - Interp_instruction* aI = a->vintrp(); - Interp_instruction* bI = b->vintrp(); - if (aI->attribute != bI->attribute) + Interp_instruction& aI = a->vintrp(); + Interp_instruction& bI = b->vintrp(); + if (aI.attribute != bI.attribute) return false; - if (aI->component != bI->component) + if (aI.component != bI.component) return false; return true; } case Format::VOP3P: { - VOP3P_instruction* a3P = a->vop3p(); - VOP3P_instruction* b3P = b->vop3p(); + VOP3P_instruction& a3P = a->vop3p(); + VOP3P_instruction& b3P = b->vop3p(); for (unsigned i = 0; i < 3; i++) { - if (a3P->neg_lo[i] != b3P->neg_lo[i] || - a3P->neg_hi[i] != b3P->neg_hi[i]) + if (a3P.neg_lo[i] != b3P.neg_lo[i] || + a3P.neg_hi[i] != b3P.neg_hi[i]) return false; } - return a3P->opsel_lo == b3P->opsel_lo && - a3P->opsel_hi == b3P->opsel_hi && - a3P->clamp == b3P->clamp; + return a3P.opsel_lo == b3P.opsel_lo && + a3P.opsel_hi == b3P.opsel_hi && + a3P.clamp == b3P.clamp; } case Format::PSEUDO_REDUCTION: { - Pseudo_reduction_instruction *aR = a->reduction(); - Pseudo_reduction_instruction *bR = b->reduction(); - return aR->pass_flags == bR->pass_flags && - aR->reduce_op == bR->reduce_op && - aR->cluster_size == bR->cluster_size; + Pseudo_reduction_instruction& aR = a->reduction(); + Pseudo_reduction_instruction& bR = b->reduction(); + return aR.pass_flags == bR.pass_flags && + aR.reduce_op == bR.reduce_op && + aR.cluster_size == bR.cluster_size; } case Format::MTBUF: { - MTBUF_instruction* aM = a->mtbuf(); - MTBUF_instruction* bM = b->mtbuf(); - return aM->sync.can_reorder() && bM->sync.can_reorder() && - aM->sync == bM->sync && - aM->dfmt == bM->dfmt && - aM->nfmt == bM->nfmt && - aM->offset == bM->offset && - aM->offen == bM->offen && - aM->idxen == bM->idxen && - aM->glc == bM->glc && - aM->dlc == bM->dlc && - aM->slc == bM->slc && - aM->tfe == bM->tfe && - aM->disable_wqm == bM->disable_wqm; + MTBUF_instruction& aM = a->mtbuf(); + MTBUF_instruction& bM = b->mtbuf(); + return aM.sync.can_reorder() && bM.sync.can_reorder() && + aM.sync == bM.sync && + aM.dfmt == bM.dfmt && + aM.nfmt == bM.nfmt && + aM.offset == bM.offset && + aM.offen == bM.offen && + aM.idxen == bM.idxen && + aM.glc == bM.glc && + aM.dlc == bM.dlc && + aM.slc == bM.slc && + aM.tfe == bM.tfe && + aM.disable_wqm == bM.disable_wqm; } case Format::MUBUF: { - MUBUF_instruction* aM = a->mubuf(); - MUBUF_instruction* bM = b->mubuf(); - return aM->sync.can_reorder() && bM->sync.can_reorder() && - aM->sync == bM->sync && - aM->offset == bM->offset && - aM->offen == bM->offen && - aM->idxen == bM->idxen && - aM->glc == bM->glc && - aM->dlc == bM->dlc && - aM->slc == bM->slc && - aM->tfe == bM->tfe && - aM->lds == bM->lds && - aM->disable_wqm == bM->disable_wqm; + MUBUF_instruction& aM = a->mubuf(); + MUBUF_instruction& bM = b->mubuf(); + return aM.sync.can_reorder() && bM.sync.can_reorder() && + aM.sync == bM.sync && + aM.offset == bM.offset && + aM.offen == bM.offen && + aM.idxen == bM.idxen && + aM.glc == bM.glc && + aM.dlc == bM.dlc && + aM.slc == bM.slc && + aM.tfe == bM.tfe && + aM.lds == bM.lds && + aM.disable_wqm == bM.disable_wqm; } /* we want to optimize these in NIR and don't hassle with load-store dependencies */ case Format::FLAT: @@ -308,31 +308,31 @@ struct InstrPred { a->opcode != aco_opcode::ds_permute_b32 && a->opcode != aco_opcode::ds_swizzle_b32) return false; - DS_instruction* aD = a->ds(); - DS_instruction* bD = b->ds(); - return aD->sync.can_reorder() && bD->sync.can_reorder() && - aD->sync == bD->sync && - aD->pass_flags == bD->pass_flags && - aD->gds == bD->gds && - aD->offset0 == bD->offset0 && - aD->offset1 == bD->offset1; + DS_instruction& aD = a->ds(); + DS_instruction& bD = b->ds(); + return aD.sync.can_reorder() && bD.sync.can_reorder() && + aD.sync == bD.sync && + aD.pass_flags == bD.pass_flags && + aD.gds == bD.gds && + aD.offset0 == bD.offset0 && + aD.offset1 == bD.offset1; } case Format::MIMG: { - MIMG_instruction* aM = a->mimg(); - MIMG_instruction* bM = b->mimg(); - return aM->sync.can_reorder() && bM->sync.can_reorder() && - aM->sync == bM->sync && - aM->dmask == bM->dmask && - aM->unrm == bM->unrm && - aM->glc == bM->glc && - aM->slc == bM->slc && - aM->tfe == bM->tfe && - aM->da == bM->da && - aM->lwe == bM->lwe && - aM->r128 == bM->r128 && - aM->a16 == bM->a16 && - aM->d16 == bM->d16 && - aM->disable_wqm == bM->disable_wqm; + MIMG_instruction& aM = a->mimg(); + MIMG_instruction& bM = b->mimg(); + return aM.sync.can_reorder() && bM.sync.can_reorder() && + aM.sync == bM.sync && + aM.dmask == bM.dmask && + aM.unrm == bM.unrm && + aM.glc == bM.glc && + aM.slc == bM.slc && + aM.tfe == bM.tfe && + aM.da == bM.da && + aM.lwe == bM.lwe && + aM.r128 == bM.r128 && + aM.a16 == bM.a16 && + aM.d16 == bM.d16 && + aM.disable_wqm == bM.disable_wqm; } default: return true; diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index 63bb9b1..3cffcb1 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -940,7 +940,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode]; if (instr->isSDWA()) - can_use_mod = can_use_mod && (instr->sdwa()->sel[i] & sdwa_asuint) == sdwa_udword; + can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword; else can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr)); @@ -949,11 +949,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) to_VOP3(ctx, instr); instr->operands[i] = Operand(info.temp); if (instr->isDPP()) - instr->dpp()->abs[i] = true; + instr->dpp().abs[i] = true; else if (instr->isSDWA()) - instr->sdwa()->abs[i] = true; + instr->sdwa().abs[i] = true; else - instr->vop3()->abs[i] = true; + instr->vop3().abs[i] = true; } if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) { instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32; @@ -968,11 +968,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) to_VOP3(ctx, instr); instr->operands[i].setTemp(info.temp); if (instr->isDPP()) - instr->dpp()->neg[i] = true; + instr->dpp().neg[i] = true; else if (instr->isSDWA()) - instr->sdwa()->neg[i] = true; + instr->sdwa().neg[i] = true; else - instr->vop3()->neg[i] = true; + instr->vop3().neg[i] = true; continue; } unsigned bits = get_operand_size(instr, i); @@ -999,7 +999,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) /* MUBUF: propagate constants and combine additions */ else if (instr->isMUBUF()) { - MUBUF_instruction *mubuf = instr->mubuf(); + MUBUF_instruction& mubuf = instr->mubuf(); Temp base; uint32_t offset; while (info.is_temp()) @@ -1011,29 +1011,29 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) * scratch accesses and other accesses and swizzling changing how * addressing works significantly, this probably applies to swizzled * MUBUF accesses. */ - bool vaddr_prevent_overflow = mubuf->swizzled && ctx.program->chip_class < GFX9; - bool saddr_prevent_overflow = mubuf->swizzled; + bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->chip_class < GFX9; + bool saddr_prevent_overflow = mubuf.swizzled; - if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) { - assert(!mubuf->idxen); + if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) { + assert(!mubuf.idxen); instr->operands[1] = Operand(v1); - mubuf->offset += info.val; - mubuf->offen = false; + mubuf.offset += info.val; + mubuf.offen = false; continue; - } else if (i == 2 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) { + } else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) { instr->operands[2] = Operand((uint32_t) 0); - mubuf->offset += info.val; + mubuf.offset += info.val; continue; - } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) && - base.regClass() == v1 && mubuf->offset + offset < 4096) { - assert(!mubuf->idxen); + } else if (mubuf.offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) && + base.regClass() == v1 && mubuf.offset + offset < 4096) { + assert(!mubuf.idxen); instr->operands[1].setTemp(base); - mubuf->offset += offset; + mubuf.offset += offset; continue; } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, saddr_prevent_overflow) && - base.regClass() == s1 && mubuf->offset + offset < 4096) { + base.regClass() == s1 && mubuf.offset + offset < 4096) { instr->operands[i].setTemp(base); - mubuf->offset += offset; + mubuf.offset += offset; continue; } } @@ -1041,7 +1041,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) /* DS: combine additions */ else if (instr->isDS()) { - DS_instruction *ds = instr->ds(); + DS_instruction& ds = instr->ds(); Temp base; uint32_t offset; bool has_usable_ds_offset = ctx.program->chip_class >= GFX7; @@ -1055,16 +1055,16 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) unsigned shifts = (instr->opcode == aco_opcode::ds_write2_b64 || instr->opcode == aco_opcode::ds_read2_b64) ? 3 : 2; if ((offset & mask) == 0 && - ds->offset0 + (offset >> shifts) <= 255 && - ds->offset1 + (offset >> shifts) <= 255) { + ds.offset0 + (offset >> shifts) <= 255 && + ds.offset1 + (offset >> shifts) <= 255) { instr->operands[i].setTemp(base); - ds->offset0 += offset >> shifts; - ds->offset1 += offset >> shifts; + ds.offset0 += offset >> shifts; + ds.offset1 += offset >> shifts; } } else { - if (ds->offset0 + offset <= 65535) { + if (ds.offset0 + offset <= 65535) { instr->operands[i].setTemp(base); - ds->offset0 += offset; + ds.offset0 += offset; } } } @@ -1073,10 +1073,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) /* SMEM: propagate constants and combine additions */ else if (instr->isSMEM()) { - SMEM_instruction *smem = instr->smem(); + SMEM_instruction& smem = instr->smem(); Temp base; uint32_t offset; - bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow; + bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow; if (i == 1 && info.is_constant_or_literal(32) && ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) || (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) || @@ -1084,31 +1084,30 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) instr->operands[i] = Operand(info.val); continue; } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) { - bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4); + bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4); if (soe && - (!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) || - ctx.info[smem->operands.back().tempId()].val != 0)) { + (!ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) || + ctx.info[smem.operands.back().tempId()].val != 0)) { continue; } if (soe) { - smem->operands[1] = Operand(offset); - smem->operands.back() = Operand(base); + smem.operands[1] = Operand(offset); + smem.operands.back() = Operand(base); } else { - SMEM_instruction *new_instr = create_instruction(smem->opcode, Format::SMEM, smem->operands.size() + 1, smem->definitions.size()); - new_instr->operands[0] = smem->operands[0]; + SMEM_instruction *new_instr = create_instruction(smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size()); + new_instr->operands[0] = smem.operands[0]; new_instr->operands[1] = Operand(offset); - if (smem->definitions.empty()) - new_instr->operands[2] = smem->operands[2]; + if (smem.definitions.empty()) + new_instr->operands[2] = smem.operands[2]; new_instr->operands.back() = Operand(base); - if (!smem->definitions.empty()) - new_instr->definitions[0] = smem->definitions[0]; - new_instr->sync = smem->sync; - new_instr->glc = smem->glc; - new_instr->dlc = smem->dlc; - new_instr->nv = smem->nv; - new_instr->disable_wqm = smem->disable_wqm; + if (!smem.definitions.empty()) + new_instr->definitions[0] = smem.definitions[0]; + new_instr->sync = smem.sync; + new_instr->glc = smem.glc; + new_instr->dlc = smem.dlc; + new_instr->nv = smem.nv; + new_instr->disable_wqm = smem.disable_wqm; instr.reset(new_instr); - smem = instr->smem(); } continue; } @@ -1365,10 +1364,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr) } case aco_opcode::v_med3_f16: case aco_opcode::v_med3_f32: { /* clamp */ - VOP3_instruction* vop3 = instr->vop3(); - if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] || - vop3->neg[0] || vop3->neg[1] || vop3->neg[2] || - vop3->omod != 0 || vop3->opsel != 0) + VOP3_instruction& vop3 = instr->vop3(); + if (vop3.abs[0] || vop3.abs[1] || vop3.abs[2] || + vop3.neg[0] || vop3.neg[1] || vop3.neg[2] || + vop3.omod != 0 || vop3.opsel != 0) break; unsigned idx = 0; @@ -1682,12 +1681,12 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr& instr) return false; if (op_instr[i]->isVOP3()) { - VOP3_instruction *vop3 = op_instr[i]->vop3(); - if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) + VOP3_instruction& vop3 = op_instr[i]->vop3(); + if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2) return false; - neg[i] = vop3->neg[0]; - abs[i] = vop3->abs[0]; - opsel |= (vop3->opsel & 1) << i; + neg[i] = vop3.neg[0]; + abs[i] = vop3.abs[0]; + opsel |= (vop3.opsel & 1) << i; } else if (op_instr[i]->isSDWA()) { return false; } @@ -1798,12 +1797,12 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr& instr) Instruction *new_instr; if (cmp->isVOP3()) { VOP3_instruction *new_vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); - VOP3_instruction *cmp_vop3 = cmp->vop3(); - memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); - memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); - new_vop3->clamp = cmp_vop3->clamp; - new_vop3->omod = cmp_vop3->omod; - new_vop3->opsel = cmp_vop3->opsel; + VOP3_instruction& cmp_vop3 = cmp->vop3(); + memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs)); + memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg)); + new_vop3->clamp = cmp_vop3.clamp; + new_vop3->omod = cmp_vop3.omod; + new_vop3->opsel = cmp_vop3.opsel; new_instr = new_vop3; } else { new_instr = create_instruction(new_op, Format::VOPC, 2, 1); @@ -1885,8 +1884,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& in return false; if (nan_test->isVOP3()) { - VOP3_instruction *vop3 = nan_test->vop3(); - if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2) + VOP3_instruction& vop3 = nan_test->vop3(); + if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2) return false; } @@ -1917,12 +1916,12 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr& in Instruction *new_instr; if (cmp->isVOP3()) { VOP3_instruction *new_vop3 = create_instruction(new_op, asVOP3(Format::VOPC), 2, 1); - VOP3_instruction *cmp_vop3 = cmp->vop3(); - memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); - memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); - new_vop3->clamp = cmp_vop3->clamp; - new_vop3->omod = cmp_vop3->omod; - new_vop3->opsel = cmp_vop3->opsel; + VOP3_instruction& cmp_vop3 = cmp->vop3(); + memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs)); + memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg)); + new_vop3->clamp = cmp_vop3.clamp; + new_vop3->omod = cmp_vop3.omod; + new_vop3->opsel = cmp_vop3.opsel; new_instr = new_vop3; } else { new_instr = create_instruction(new_op, Format::VOPC, 2, 1); @@ -1966,24 +1965,24 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr& instr) Instruction *new_instr; if (cmp->isVOP3()) { VOP3_instruction *new_vop3 = create_instruction(new_opcode, asVOP3(Format::VOPC), 2, 1); - VOP3_instruction *cmp_vop3 = cmp->vop3(); - memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs)); - memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg)); - new_vop3->clamp = cmp_vop3->clamp; - new_vop3->omod = cmp_vop3->omod; - new_vop3->opsel = cmp_vop3->opsel; + VOP3_instruction& cmp_vop3 = cmp->vop3(); + memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs)); + memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg)); + new_vop3->clamp = cmp_vop3.clamp; + new_vop3->omod = cmp_vop3.omod; + new_vop3->opsel = cmp_vop3.opsel; new_instr = new_vop3; } else if (cmp->isSDWA()) { SDWA_instruction *new_sdwa = create_instruction( new_opcode, (Format)((uint16_t)Format::SDWA | (uint16_t)Format::VOPC), 2, 1); - SDWA_instruction *cmp_sdwa = cmp->sdwa(); - memcpy(new_sdwa->abs, cmp_sdwa->abs, sizeof(new_sdwa->abs)); - memcpy(new_sdwa->sel, cmp_sdwa->sel, sizeof(new_sdwa->sel)); - memcpy(new_sdwa->neg, cmp_sdwa->neg, sizeof(new_sdwa->neg)); - new_sdwa->dst_sel = cmp_sdwa->dst_sel; - new_sdwa->dst_preserve = cmp_sdwa->dst_preserve; - new_sdwa->clamp = cmp_sdwa->clamp; - new_sdwa->omod = cmp_sdwa->omod; + SDWA_instruction& cmp_sdwa = cmp->sdwa(); + memcpy(new_sdwa->abs, cmp_sdwa.abs, sizeof(new_sdwa->abs)); + memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel)); + memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg)); + new_sdwa->dst_sel = cmp_sdwa.dst_sel; + new_sdwa->dst_preserve = cmp_sdwa.dst_preserve; + new_sdwa->clamp = cmp_sdwa.clamp; + new_sdwa->omod = cmp_sdwa.omod; new_instr = new_sdwa; } else { new_instr = create_instruction(new_opcode, Format::VOPC, 2, 1); @@ -2019,8 +2018,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2, if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1])) return false; - VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? op1_instr->vop3() : NULL; - VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? op2_instr->vop3() : NULL; + VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? &op1_instr->vop3() : NULL; + VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? &op2_instr->vop3() : NULL; if (op1_instr->isSDWA() || op2_instr->isSDWA()) return false; @@ -2641,11 +2640,11 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr& instr) assert(!ctx.info[instr->definitions[0].tempId()].is_mad()); if (instr->isSDWA()) { - if (!apply_omod_clamp_helper(ctx, instr->sdwa(), def_info)) + if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info)) return false; } else { to_VOP3(ctx, instr); - if (!apply_omod_clamp_helper(ctx, instr->vop3(), def_info)) + if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info)) return false; } @@ -2767,7 +2766,7 @@ void propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opse void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) { - VOP3P_instruction* vop3p = instr->vop3p(); + VOP3P_instruction* vop3p = &instr->vop3p(); /* apply clamp */ if (instr->opcode == aco_opcode::v_pk_mul_f16 && @@ -2778,7 +2777,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) ssa_info& info = ctx.info[instr->operands[0].tempId()]; if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) { - VOP3P_instruction* candidate = ctx.info[instr->operands[0].tempId()].instr->vop3p(); + VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p(); candidate->clamp = true; propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi); std::swap(instr->definitions[0], candidate->definitions[0]); @@ -2804,7 +2803,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) if (!check_vop3_operands(ctx, 2, ops)) continue; - VOP3P_instruction* fneg = info.instr->vop3p(); + VOP3P_instruction* fneg = &info.instr->vop3p(); if (fneg->clamp) continue; instr->operands[i] = fneg->operands[0]; @@ -2849,7 +2848,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) continue; /* no clamp allowed between mul and add */ - if (info.instr->vop3p()->clamp) + if (info.instr->vop3p().clamp) continue; mul_instr = info.instr; @@ -2875,7 +2874,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr& instr) /* turn packed mul+add into v_pk_fma_f16 */ assert(mul_instr->isVOP3P()); aco_ptr fma{create_instruction(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)}; - VOP3P_instruction* mul = mul_instr->vop3p(); + VOP3P_instruction* mul = &mul_instr->vop3p(); for (unsigned i = 0; i < 2; i++) { fma->operands[i] = op[i]; fma->neg_lo[i] = mul->neg_lo[i]; @@ -2944,7 +2943,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr if (mul_instr->operands[0].isLiteral()) return; - if (mul_instr->isVOP3() && mul_instr->vop3()->clamp) + if (mul_instr->isVOP3() && mul_instr->vop3().clamp) return; if (mul_instr->isSDWA()) return; @@ -2958,17 +2957,17 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr instr->operands[0] = mul_instr->operands[0]; instr->operands[1] = mul_instr->operands[1]; instr->definitions[0] = def; - VOP3_instruction* new_mul = instr->vop3(); + VOP3_instruction& new_mul = instr->vop3(); if (mul_instr->isVOP3()) { - VOP3_instruction* mul = mul_instr->vop3(); - new_mul->neg[0] = mul->neg[0] && !is_abs; - new_mul->neg[1] = mul->neg[1] && !is_abs; - new_mul->abs[0] = mul->abs[0] || is_abs; - new_mul->abs[1] = mul->abs[1] || is_abs; - new_mul->omod = mul->omod; + VOP3_instruction& mul = mul_instr->vop3(); + new_mul.neg[0] = mul.neg[0] && !is_abs; + new_mul.neg[1] = mul.neg[1] && !is_abs; + new_mul.abs[0] = mul.abs[0] || is_abs; + new_mul.abs[1] = mul.abs[1] || is_abs; + new_mul.omod = mul.omod; } - new_mul->neg[0] ^= true; - new_mul->clamp = false; + new_mul.neg[0] ^= true; + new_mul.clamp = false; ctx.info[instr->definitions[0].tempId()].set_mul(instr.get()); return; @@ -3002,7 +3001,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr continue; /* no clamp/omod allowed between mul and add */ - if (info.instr->isVOP3() && (info.instr->vop3()->clamp || info.instr->vop3()->omod)) + if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod)) continue; Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]}; @@ -3033,28 +3032,28 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr& instr bool clamp = false; if (mul_instr->isVOP3()) { - VOP3_instruction* vop3 = mul_instr->vop3(); - neg[0] = vop3->neg[0]; - neg[1] = vop3->neg[1]; - abs[0] = vop3->abs[0]; - abs[1] = vop3->abs[1]; + VOP3_instruction& vop3 = mul_instr->vop3(); + neg[0] = vop3.neg[0]; + neg[1] = vop3.neg[1]; + abs[0] = vop3.abs[0]; + abs[1] = vop3.abs[1]; } if (instr->isVOP3()) { - VOP3_instruction* vop3 = instr->vop3(); - neg[2] = vop3->neg[add_op_idx]; - abs[2] = vop3->abs[add_op_idx]; - omod = vop3->omod; - clamp = vop3->clamp; + VOP3_instruction& vop3 = instr->vop3(); + neg[2] = vop3.neg[add_op_idx]; + abs[2] = vop3.abs[add_op_idx]; + omod = vop3.omod; + clamp = vop3.clamp; /* abs of the multiplication result */ - if (vop3->abs[1 - add_op_idx]) { + if (vop3.abs[1 - add_op_idx]) { neg[0] = false; neg[1] = false; abs[0] = true; abs[1] = true; } /* neg of the multiplication result */ - neg[1] = neg[1] ^ vop3->neg[1 - add_op_idx]; + neg[1] = neg[1] ^ vop3.neg[1 - add_op_idx]; } if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16) neg[1 + add_op_idx] = neg[1 + add_op_idx] ^ true; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 16d198f..86abb47 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -273,12 +273,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) { switch (instr->format) { case Format::SOPK: { - const SOPK_instruction* sopk = instr->sopk(); - fprintf(output, " imm:%d", sopk->imm & 0x8000 ? (sopk->imm - 65536) : sopk->imm); + const SOPK_instruction& sopk = instr->sopk(); + fprintf(output, " imm:%d", sopk.imm & 0x8000 ? (sopk.imm - 65536) : sopk.imm); break; } case Format::SOPP: { - uint16_t imm = instr->sopp()->imm; + uint16_t imm = instr->sopp().imm; switch (instr->opcode) { case aco_opcode::s_waitcnt: { /* we usually should check the chip class for vmcnt/lgkm, but @@ -340,74 +340,74 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) break; } } - if (instr->sopp()->block != -1) - fprintf(output, " block:BB%d", instr->sopp()->block); + if (instr->sopp().block != -1) + fprintf(output, " block:BB%d", instr->sopp().block); break; } case Format::SMEM: { - const SMEM_instruction* smem = instr->smem(); - if (smem->glc) + const SMEM_instruction& smem = instr->smem(); + if (smem.glc) fprintf(output, " glc"); - if (smem->dlc) + if (smem.dlc) fprintf(output, " dlc"); - if (smem->nv) + if (smem.nv) fprintf(output, " nv"); - print_sync(smem->sync, output); + print_sync(smem.sync, output); break; } case Format::VINTRP: { - const Interp_instruction* vintrp = instr->vintrp(); - fprintf(output, " attr%d.%c", vintrp->attribute, "xyzw"[vintrp->component]); + const Interp_instruction& vintrp = instr->vintrp(); + fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]); break; } case Format::DS: { - const DS_instruction* ds = instr->ds(); - if (ds->offset0) - fprintf(output, " offset0:%u", ds->offset0); - if (ds->offset1) - fprintf(output, " offset1:%u", ds->offset1); - if (ds->gds) + const DS_instruction& ds = instr->ds(); + if (ds.offset0) + fprintf(output, " offset0:%u", ds.offset0); + if (ds.offset1) + fprintf(output, " offset1:%u", ds.offset1); + if (ds.gds) fprintf(output, " gds"); - print_sync(ds->sync, output); + print_sync(ds.sync, output); break; } case Format::MUBUF: { - const MUBUF_instruction* mubuf = instr->mubuf(); - if (mubuf->offset) - fprintf(output, " offset:%u", mubuf->offset); - if (mubuf->offen) + const MUBUF_instruction& mubuf = instr->mubuf(); + if (mubuf.offset) + fprintf(output, " offset:%u", mubuf.offset); + if (mubuf.offen) fprintf(output, " offen"); - if (mubuf->idxen) + if (mubuf.idxen) fprintf(output, " idxen"); - if (mubuf->addr64) + if (mubuf.addr64) fprintf(output, " addr64"); - if (mubuf->glc) + if (mubuf.glc) fprintf(output, " glc"); - if (mubuf->dlc) + if (mubuf.dlc) fprintf(output, " dlc"); - if (mubuf->slc) + if (mubuf.slc) fprintf(output, " slc"); - if (mubuf->tfe) + if (mubuf.tfe) fprintf(output, " tfe"); - if (mubuf->lds) + if (mubuf.lds) fprintf(output, " lds"); - if (mubuf->disable_wqm) + if (mubuf.disable_wqm) fprintf(output, " disable_wqm"); - print_sync(mubuf->sync, output); + print_sync(mubuf.sync, output); break; } case Format::MIMG: { - const MIMG_instruction* mimg = instr->mimg(); + const MIMG_instruction& mimg = instr->mimg(); unsigned identity_dmask = !instr->definitions.empty() ? (1 << instr->definitions[0].size()) - 1 : 0xf; - if ((mimg->dmask & identity_dmask) != identity_dmask) + if ((mimg.dmask & identity_dmask) != identity_dmask) fprintf(output, " dmask:%s%s%s%s", - mimg->dmask & 0x1 ? "x" : "", - mimg->dmask & 0x2 ? "y" : "", - mimg->dmask & 0x4 ? "z" : "", - mimg->dmask & 0x8 ? "w" : ""); - switch (mimg->dim) { + mimg.dmask & 0x1 ? "x" : "", + mimg.dmask & 0x2 ? "y" : "", + mimg.dmask & 0x4 ? "z" : "", + mimg.dmask & 0x8 ? "w" : ""); + switch (mimg.dim) { case ac_image_1d: fprintf(output, " 1d"); break; @@ -433,104 +433,104 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) fprintf(output, " 2darraymsaa"); break; } - if (mimg->unrm) + if (mimg.unrm) fprintf(output, " unrm"); - if (mimg->glc) + if (mimg.glc) fprintf(output, " glc"); - if (mimg->dlc) + if (mimg.dlc) fprintf(output, " dlc"); - if (mimg->slc) + if (mimg.slc) fprintf(output, " slc"); - if (mimg->tfe) + if (mimg.tfe) fprintf(output, " tfe"); - if (mimg->da) + if (mimg.da) fprintf(output, " da"); - if (mimg->lwe) + if (mimg.lwe) fprintf(output, " lwe"); - if (mimg->r128 || mimg->a16) + if (mimg.r128 || mimg.a16) fprintf(output, " r128/a16"); - if (mimg->d16) + if (mimg.d16) fprintf(output, " d16"); - if (mimg->disable_wqm) + if (mimg.disable_wqm) fprintf(output, " disable_wqm"); - print_sync(mimg->sync, output); + print_sync(mimg.sync, output); break; } case Format::EXP: { - const Export_instruction* exp = instr->exp(); - unsigned identity_mask = exp->compressed ? 0x5 : 0xf; - if ((exp->enabled_mask & identity_mask) != identity_mask) + const Export_instruction& exp = instr->exp(); + unsigned identity_mask = exp.compressed ? 0x5 : 0xf; + if ((exp.enabled_mask & identity_mask) != identity_mask) fprintf(output, " en:%c%c%c%c", - exp->enabled_mask & 0x1 ? 'r' : '*', - exp->enabled_mask & 0x2 ? 'g' : '*', - exp->enabled_mask & 0x4 ? 'b' : '*', - exp->enabled_mask & 0x8 ? 'a' : '*'); - if (exp->compressed) + exp.enabled_mask & 0x1 ? 'r' : '*', + exp.enabled_mask & 0x2 ? 'g' : '*', + exp.enabled_mask & 0x4 ? 'b' : '*', + exp.enabled_mask & 0x8 ? 'a' : '*'); + if (exp.compressed) fprintf(output, " compr"); - if (exp->done) + if (exp.done) fprintf(output, " done"); - if (exp->valid_mask) + if (exp.valid_mask) fprintf(output, " vm"); - if (exp->dest <= V_008DFC_SQ_EXP_MRT + 7) - fprintf(output, " mrt%d", exp->dest - V_008DFC_SQ_EXP_MRT); - else if (exp->dest == V_008DFC_SQ_EXP_MRTZ) + if (exp.dest <= V_008DFC_SQ_EXP_MRT + 7) + fprintf(output, " mrt%d", exp.dest - V_008DFC_SQ_EXP_MRT); + else if (exp.dest == V_008DFC_SQ_EXP_MRTZ) fprintf(output, " mrtz"); - else if (exp->dest == V_008DFC_SQ_EXP_NULL) + else if (exp.dest == V_008DFC_SQ_EXP_NULL) fprintf(output, " null"); - else if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= V_008DFC_SQ_EXP_POS + 3) - fprintf(output, " pos%d", exp->dest - V_008DFC_SQ_EXP_POS); - else if (exp->dest >= V_008DFC_SQ_EXP_PARAM && exp->dest <= V_008DFC_SQ_EXP_PARAM + 31) - fprintf(output, " param%d", exp->dest - V_008DFC_SQ_EXP_PARAM); + else if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= V_008DFC_SQ_EXP_POS + 3) + fprintf(output, " pos%d", exp.dest - V_008DFC_SQ_EXP_POS); + else if (exp.dest >= V_008DFC_SQ_EXP_PARAM && exp.dest <= V_008DFC_SQ_EXP_PARAM + 31) + fprintf(output, " param%d", exp.dest - V_008DFC_SQ_EXP_PARAM); break; } case Format::PSEUDO_BRANCH: { - const Pseudo_branch_instruction* branch = instr->branch(); + const Pseudo_branch_instruction& branch = instr->branch(); /* Note: BB0 cannot be a branch target */ - if (branch->target[0] != 0) - fprintf(output, " BB%d", branch->target[0]); - if (branch->target[1] != 0) - fprintf(output, ", BB%d", branch->target[1]); + if (branch.target[0] != 0) + fprintf(output, " BB%d", branch.target[0]); + if (branch.target[1] != 0) + fprintf(output, ", BB%d", branch.target[1]); break; } case Format::PSEUDO_REDUCTION: { - const Pseudo_reduction_instruction* reduce = instr->reduction(); - fprintf(output, " op:%s", reduce_ops[reduce->reduce_op]); - if (reduce->cluster_size) - fprintf(output, " cluster_size:%u", reduce->cluster_size); + const Pseudo_reduction_instruction& reduce = instr->reduction(); + fprintf(output, " op:%s", reduce_ops[reduce.reduce_op]); + if (reduce.cluster_size) + fprintf(output, " cluster_size:%u", reduce.cluster_size); break; } case Format::PSEUDO_BARRIER: { - const Pseudo_barrier_instruction* barrier = instr->barrier(); - print_sync(barrier->sync, output); - print_scope(barrier->exec_scope, output, "exec_scope"); + const Pseudo_barrier_instruction& barrier = instr->barrier(); + print_sync(barrier.sync, output); + print_scope(barrier.exec_scope, output, "exec_scope"); break; } case Format::FLAT: case Format::GLOBAL: case Format::SCRATCH: { - const FLAT_instruction* flat = instr->flatlike(); - if (flat->offset) - fprintf(output, " offset:%u", flat->offset); - if (flat->glc) + const FLAT_instruction& flat = instr->flatlike(); + if (flat.offset) + fprintf(output, " offset:%u", flat.offset); + if (flat.glc) fprintf(output, " glc"); - if (flat->dlc) + if (flat.dlc) fprintf(output, " dlc"); - if (flat->slc) + if (flat.slc) fprintf(output, " slc"); - if (flat->lds) + if (flat.lds) fprintf(output, " lds"); - if (flat->nv) + if (flat.nv) fprintf(output, " nv"); - if (flat->disable_wqm) + if (flat.disable_wqm) fprintf(output, " disable_wqm"); - print_sync(flat->sync, output); + print_sync(flat.sync, output); break; } case Format::MTBUF: { - const MTBUF_instruction* mtbuf = instr->mtbuf(); + const MTBUF_instruction& mtbuf = instr->mtbuf(); fprintf(output, " dfmt:"); - switch (mtbuf->dfmt) { + switch (mtbuf.dfmt) { case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break; case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break; case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break; @@ -548,7 +548,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break; } fprintf(output, " nfmt:"); - switch (mtbuf->nfmt) { + switch (mtbuf.nfmt) { case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break; case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break; case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break; @@ -558,27 +558,27 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break; case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break; } - if (mtbuf->offset) - fprintf(output, " offset:%u", mtbuf->offset); - if (mtbuf->offen) + if (mtbuf.offset) + fprintf(output, " offset:%u", mtbuf.offset); + if (mtbuf.offen) fprintf(output, " offen"); - if (mtbuf->idxen) + if (mtbuf.idxen) fprintf(output, " idxen"); - if (mtbuf->glc) + if (mtbuf.glc) fprintf(output, " glc"); - if (mtbuf->dlc) + if (mtbuf.dlc) fprintf(output, " dlc"); - if (mtbuf->slc) + if (mtbuf.slc) fprintf(output, " slc"); - if (mtbuf->tfe) + if (mtbuf.tfe) fprintf(output, " tfe"); - if (mtbuf->disable_wqm) + if (mtbuf.disable_wqm) fprintf(output, " disable_wqm"); - print_sync(mtbuf->sync, output); + print_sync(mtbuf.sync, output); break; } case Format::VOP3P: { - if (instr->vop3p()->clamp) + if (instr->vop3p().clamp) fprintf(output, " clamp"); break; } @@ -587,8 +587,8 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) } } if (instr->isVOP3()) { - const VOP3_instruction* vop3 = instr->vop3(); - switch (vop3->omod) { + const VOP3_instruction& vop3 = instr->vop3(); + switch (vop3.omod) { case 1: fprintf(output, " *2"); break; @@ -599,50 +599,50 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) fprintf(output, " *0.5"); break; } - if (vop3->clamp) + if (vop3.clamp) fprintf(output, " clamp"); - if (vop3->opsel & (1 << 3)) + if (vop3.opsel & (1 << 3)) fprintf(output, " opsel_hi"); } else if (instr->isDPP()) { - const DPP_instruction* dpp = instr->dpp(); - if (dpp->dpp_ctrl <= 0xff) { + const DPP_instruction& dpp = instr->dpp(); + if (dpp.dpp_ctrl <= 0xff) { fprintf(output, " quad_perm:[%d,%d,%d,%d]", - dpp->dpp_ctrl & 0x3, (dpp->dpp_ctrl >> 2) & 0x3, - (dpp->dpp_ctrl >> 4) & 0x3, (dpp->dpp_ctrl >> 6) & 0x3); - } else if (dpp->dpp_ctrl >= 0x101 && dpp->dpp_ctrl <= 0x10f) { - fprintf(output, " row_shl:%d", dpp->dpp_ctrl & 0xf); - } else if (dpp->dpp_ctrl >= 0x111 && dpp->dpp_ctrl <= 0x11f) { - fprintf(output, " row_shr:%d", dpp->dpp_ctrl & 0xf); - } else if (dpp->dpp_ctrl >= 0x121 && dpp->dpp_ctrl <= 0x12f) { - fprintf(output, " row_ror:%d", dpp->dpp_ctrl & 0xf); - } else if (dpp->dpp_ctrl == dpp_wf_sl1) { + dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3, + (dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3); + } else if (dpp.dpp_ctrl >= 0x101 && dpp.dpp_ctrl <= 0x10f) { + fprintf(output, " row_shl:%d", dpp.dpp_ctrl & 0xf); + } else if (dpp.dpp_ctrl >= 0x111 && dpp.dpp_ctrl <= 0x11f) { + fprintf(output, " row_shr:%d", dpp.dpp_ctrl & 0xf); + } else if (dpp.dpp_ctrl >= 0x121 && dpp.dpp_ctrl <= 0x12f) { + fprintf(output, " row_ror:%d", dpp.dpp_ctrl & 0xf); + } else if (dpp.dpp_ctrl == dpp_wf_sl1) { fprintf(output, " wave_shl:1"); - } else if (dpp->dpp_ctrl == dpp_wf_rl1) { + } else if (dpp.dpp_ctrl == dpp_wf_rl1) { fprintf(output, " wave_rol:1"); - } else if (dpp->dpp_ctrl == dpp_wf_sr1) { + } else if (dpp.dpp_ctrl == dpp_wf_sr1) { fprintf(output, " wave_shr:1"); - } else if (dpp->dpp_ctrl == dpp_wf_rr1) { + } else if (dpp.dpp_ctrl == dpp_wf_rr1) { fprintf(output, " wave_ror:1"); - } else if (dpp->dpp_ctrl == dpp_row_mirror) { + } else if (dpp.dpp_ctrl == dpp_row_mirror) { fprintf(output, " row_mirror"); - } else if (dpp->dpp_ctrl == dpp_row_half_mirror) { + } else if (dpp.dpp_ctrl == dpp_row_half_mirror) { fprintf(output, " row_half_mirror"); - } else if (dpp->dpp_ctrl == dpp_row_bcast15) { + } else if (dpp.dpp_ctrl == dpp_row_bcast15) { fprintf(output, " row_bcast:15"); - } else if (dpp->dpp_ctrl == dpp_row_bcast31) { + } else if (dpp.dpp_ctrl == dpp_row_bcast31) { fprintf(output, " row_bcast:31"); } else { - fprintf(output, " dpp_ctrl:0x%.3x", dpp->dpp_ctrl); + fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl); } - if (dpp->row_mask != 0xf) - fprintf(output, " row_mask:0x%.1x", dpp->row_mask); - if (dpp->bank_mask != 0xf) - fprintf(output, " bank_mask:0x%.1x", dpp->bank_mask); - if (dpp->bound_ctrl) + if (dpp.row_mask != 0xf) + fprintf(output, " row_mask:0x%.1x", dpp.row_mask); + if (dpp.bank_mask != 0xf) + fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask); + if (dpp.bound_ctrl) fprintf(output, " bound_ctrl:1"); } else if (instr->isSDWA()) { - const SDWA_instruction* sdwa = instr->sdwa(); - switch (sdwa->omod) { + const SDWA_instruction& sdwa = instr->sdwa(); + switch (sdwa.omod) { case 1: fprintf(output, " *2"); break; @@ -653,25 +653,25 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output) fprintf(output, " *0.5"); break; } - if (sdwa->clamp) + if (sdwa.clamp) fprintf(output, " clamp"); - switch (sdwa->dst_sel & sdwa_asuint) { + switch (sdwa.dst_sel & sdwa_asuint) { case sdwa_udword: break; case sdwa_ubyte0: case sdwa_ubyte1: case sdwa_ubyte2: case sdwa_ubyte3: - fprintf(output, " dst_sel:%sbyte%u", sdwa->dst_sel & sdwa_sext ? "s" : "u", - sdwa->dst_sel & sdwa_bytenum); + fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u", + sdwa.dst_sel & sdwa_bytenum); break; case sdwa_uword0: case sdwa_uword1: - fprintf(output, " dst_sel:%sword%u", sdwa->dst_sel & sdwa_sext ? "s" : "u", - sdwa->dst_sel & sdwa_wordnum); + fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u", + sdwa.dst_sel & sdwa_wordnum); break; } - if (sdwa->dst_preserve) + if (sdwa.dst_preserve) fprintf(output, " dst_preserve"); } } @@ -693,28 +693,28 @@ void aco_print_instr(const Instruction *instr, FILE *output) bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool)); uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t)); if (instr->isVOP3()) { - const VOP3_instruction* vop3 = instr->vop3(); + const VOP3_instruction& vop3 = instr->vop3(); for (unsigned i = 0; i < instr->operands.size(); ++i) { - abs[i] = vop3->abs[i]; - neg[i] = vop3->neg[i]; - opsel[i] = vop3->opsel & (1 << i); + abs[i] = vop3.abs[i]; + neg[i] = vop3.neg[i]; + opsel[i] = vop3.opsel & (1 << i); sel[i] = sdwa_udword; } } else if (instr->isDPP()) { - const DPP_instruction* dpp = instr->dpp(); + const DPP_instruction& dpp = instr->dpp(); for (unsigned i = 0; i < instr->operands.size(); ++i) { - abs[i] = i < 2 ? dpp->abs[i] : false; - neg[i] = i < 2 ? dpp->neg[i] : false; + abs[i] = i < 2 ? dpp.abs[i] : false; + neg[i] = i < 2 ? dpp.neg[i] : false; opsel[i] = false; sel[i] = sdwa_udword; } } else if (instr->isSDWA()) { - const SDWA_instruction* sdwa = instr->sdwa(); + const SDWA_instruction& sdwa = instr->sdwa(); for (unsigned i = 0; i < instr->operands.size(); ++i) { - abs[i] = i < 2 ? sdwa->abs[i] : false; - neg[i] = i < 2 ? sdwa->neg[i] : false; + abs[i] = i < 2 ? sdwa.abs[i] : false; + neg[i] = i < 2 ? sdwa.neg[i] : false; opsel[i] = false; - sel[i] = i < 2 ? sdwa->sel[i] : sdwa_udword; + sel[i] = i < 2 ? sdwa.sel[i] : sdwa_udword; } } else { for (unsigned i = 0; i < instr->operands.size(); ++i) { @@ -756,17 +756,17 @@ void aco_print_instr(const Instruction *instr, FILE *output) fprintf(output, "|"); if (instr->isVOP3P()) { - const VOP3P_instruction* vop3 = instr->vop3p(); - if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) { + const VOP3P_instruction& vop3 = instr->vop3p(); + if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) { fprintf(output, ".%c%c", - vop3->opsel_lo & (1 << i) ? 'y' : 'x', - vop3->opsel_hi & (1 << i) ? 'y' : 'x'); + vop3.opsel_lo & (1 << i) ? 'y' : 'x', + vop3.opsel_hi & (1 << i) ? 'y' : 'x'); } - if (vop3->neg_lo[i] && vop3->neg_hi[i]) + if (vop3.neg_lo[i] && vop3.neg_hi[i]) fprintf(output, "*[-1,-1]"); - else if (vop3->neg_lo[i]) + else if (vop3.neg_lo[i]) fprintf(output, "*[-1,1]"); - else if (vop3->neg_hi[i]) + else if (vop3.neg_hi[i]) fprintf(output, "*[1,-1]"); } } diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp index 1c915f2..1d1a025 100644 --- a/src/amd/compiler/aco_reduce_assign.cpp +++ b/src/amd/compiler/aco_reduce_assign.cpp @@ -91,7 +91,7 @@ void setup_reduce_temp(Program* program) if (instr->format != Format::PSEUDO_REDUCTION) continue; - ReduceOp op = instr->reduction()->reduce_op; + ReduceOp op = instr->reduction().reduce_op; reduceTmp_in_loop |= block.loop_nest_depth > 0; if ((int)last_top_level_block_idx != inserted_at) { @@ -115,7 +115,7 @@ void setup_reduce_temp(Program* program) } /* same as before, except for the vector temporary instead of the reduce temporary */ - unsigned cluster_size = instr->reduction()->cluster_size; + unsigned cluster_size = instr->reduction().cluster_size; bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 || op == fmin64 || op == fmax64 || op == umin64 || op == umax64 || op == imin64 || op == imax64 || diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp index 7808935..84e172f 100644 --- a/src/amd/compiler/aco_register_allocation.cpp +++ b/src/amd/compiler/aco_register_allocation.cpp @@ -503,13 +503,13 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr& instr, unsigned idx update_phi_map(ctx, tmp.get(), instr.get()); return; } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) { - instr->vop3()->opsel |= (byte / 2) << idx; + instr->vop3().opsel |= (byte / 2) << idx; return; } else if (instr->isVOP3P() && byte == 2) { - VOP3P_instruction* vop3p = instr->vop3p(); - assert(!(vop3p->opsel_lo & (1 << idx))); - vop3p->opsel_lo |= 1 << idx; - vop3p->opsel_hi |= 1 << idx; + VOP3P_instruction& vop3p = instr->vop3p(); + assert(!(vop3p.opsel_lo & (1 << idx))); + vop3p.opsel_lo |= 1 << idx; + vop3p.opsel_hi |= 1 << idx; return; } @@ -613,9 +613,9 @@ void add_subdword_definition(Program *program, aco_ptr& instr, unsi convert_to_SDWA(chip, instr); return; } else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) { - VOP3_instruction *vop3 = instr->vop3(); + VOP3_instruction& vop3 = instr->vop3(); if (reg.byte() == 2) - vop3->opsel |= (1 << 3); /* dst in high half */ + vop3.opsel |= (1 << 3); /* dst in high half */ return; } @@ -1569,7 +1569,7 @@ void handle_pseudo(ra_ctx& ctx, return; if (reg_file[scc]) { - instr->pseudo()->tmp_in_scc = true; + instr->pseudo().tmp_in_scc = true; int reg = ctx.max_used_sgpr; for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--) @@ -1585,9 +1585,9 @@ void handle_pseudo(ra_ctx& ctx, } adjust_max_used_regs(ctx, s1, reg); - instr->pseudo()->scratch_sgpr = PhysReg{(unsigned)reg}; + instr->pseudo().scratch_sgpr = PhysReg{(unsigned)reg}; } else { - instr->pseudo()->tmp_in_scc = false; + instr->pseudo().tmp_in_scc = false; } } @@ -2156,7 +2156,7 @@ void register_allocation(Program *program, std::vector& live_out_per_bloc if (instr->isEXP() || (instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) || - (instr->isDS() && instr->ds()->gds)) { + (instr->isDS() && instr->ds().gds)) { for (unsigned j = 0; j < operand.size(); j++) ctx.war_hint.set(operand.physReg().reg() + j); } diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 25aee52..ad85259 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -320,7 +320,7 @@ void MoveState::upwards_skip() bool is_gs_or_done_sendmsg(const Instruction *instr) { if (instr->opcode == aco_opcode::s_sendmsg) { - uint16_t imm = instr->sopp()->imm; + uint16_t imm = instr->sopp().imm; return (imm & sendmsg_id_mask) == _sendmsg_gs || (imm & sendmsg_id_mask) == _sendmsg_gs_done; } @@ -330,7 +330,7 @@ bool is_gs_or_done_sendmsg(const Instruction *instr) bool is_done_sendmsg(const Instruction *instr) { if (instr->opcode == aco_opcode::s_sendmsg) - return (instr->sopp()->imm & sendmsg_id_mask) == _sendmsg_gs_done; + return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done; return false; } @@ -380,14 +380,14 @@ void add_memory_event(memory_event_set *set, Instruction *instr, memory_sync_inf { set->has_control_barrier |= is_done_sendmsg(instr); if (instr->opcode == aco_opcode::p_barrier) { - Pseudo_barrier_instruction *bar = instr->barrier(); - if (bar->sync.semantics & semantic_acquire) - set->bar_acquire |= bar->sync.storage; - if (bar->sync.semantics & semantic_release) - set->bar_release |= bar->sync.storage; - set->bar_classes |= bar->sync.storage; - - set->has_control_barrier |= bar->exec_scope > scope_invocation; + Pseudo_barrier_instruction& bar = instr->barrier(); + if (bar.sync.semantics & semantic_acquire) + set->bar_acquire |= bar.sync.storage; + if (bar.sync.semantics & semantic_release) + set->bar_release |= bar.sync.storage; + set->bar_classes |= bar.sync.storage; + + set->has_control_barrier |= bar.exec_scope > scope_invocation; } if (!sync->storage) @@ -857,7 +857,7 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v Instruction* current = block->instructions[idx].get(); if (block->kind & block_kind_export_end && current->isEXP()) { - unsigned target = current->exp()->dest; + unsigned target = current->exp().dest; if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) { ctx.mv.current = current; schedule_position_export(ctx, block, live_vars.register_demand[block->index], current, idx); diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 5b6339e..02e16c0 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -283,7 +283,7 @@ aco_ptr do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); } else if (instr->isSOPK()) { res.reset(create_instruction(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size())); - res->sopk()->imm = instr->sopk()->imm; + res->sopk().imm = instr->sopk().imm; } for (unsigned i = 0; i < instr->operands.size(); i++) { res->operands[i] = instr->operands[i]; @@ -1589,11 +1589,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { bld.insert(split); for (unsigned i = 0; i < temp.size(); i++) { Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true); - instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); } } else { Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true); - instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); } } else { ctx.program->config->spilled_sgprs += (*it)->operands[0].size(); @@ -1658,12 +1658,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) { Temp tmp = bld.tmp(v1); vec->operands[i] = Operand(tmp); Instruction *instr = bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true); - instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); } bld.insert(vec); } else { Instruction *instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true); - instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private); + instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private); } } else { uint32_t spill_slot = slots[spill_id]; diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp index b3d6db7..54f7405 100644 --- a/src/amd/compiler/aco_ssa_elimination.cpp +++ b/src/amd/compiler/aco_ssa_elimination.cpp @@ -178,10 +178,10 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block) pred->linear_succs[0] = succ_idx; ctx.program->blocks[succ_idx].linear_preds[i] = pred->index; - Pseudo_branch_instruction *branch = pred->instructions.back()->branch(); - assert(branch->isBranch()); - branch->target[0] = succ_idx; - branch->target[1] = succ_idx; + Pseudo_branch_instruction& branch = pred->instructions.back()->branch(); + assert(branch.isBranch()); + branch.target[0] = succ_idx; + branch.target[1] = succ_idx; } block->instructions.clear(); @@ -196,17 +196,17 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block) Block& pred = ctx.program->blocks[block->linear_preds[0]]; Block& succ = ctx.program->blocks[block->linear_succs[0]]; - Pseudo_branch_instruction* branch = pred.instructions.back()->branch(); - if (branch->opcode == aco_opcode::p_branch) { - branch->target[0] = succ.index; - branch->target[1] = succ.index; - } else if (branch->target[0] == block->index) { - branch->target[0] = succ.index; - } else if (branch->target[0] == succ.index) { - assert(branch->target[1] == block->index); - branch->target[1] = succ.index; - branch->opcode = aco_opcode::p_branch; - } else if (branch->target[1] == block->index) { + Pseudo_branch_instruction& branch = pred.instructions.back()->branch(); + if (branch.opcode == aco_opcode::p_branch) { + branch.target[0] = succ.index; + branch.target[1] = succ.index; + } else if (branch.target[0] == block->index) { + branch.target[0] = succ.index; + } else if (branch.target[0] == succ.index) { + assert(branch.target[1] == block->index); + branch.target[1] = succ.index; + branch.opcode = aco_opcode::p_branch; + } else if (branch.target[1] == block->index) { /* check if there is a fall-through path from block to succ */ bool falls_through = block->index < succ.index; for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) { @@ -215,35 +215,35 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block) falls_through = false; } if (falls_through) { - branch->target[1] = succ.index; + branch.target[1] = succ.index; } else { /* check if there is a fall-through path for the alternative target */ - if (block->index >= branch->target[0]) + if (block->index >= branch.target[0]) return; - for (unsigned j = block->index + 1; j < branch->target[0]; j++) { + for (unsigned j = block->index + 1; j < branch.target[0]; j++) { if (!ctx.program->blocks[j].instructions.empty()) return; } /* This is a (uniform) break or continue block. The branch condition has to be inverted. */ - if (branch->opcode == aco_opcode::p_cbranch_z) - branch->opcode = aco_opcode::p_cbranch_nz; - else if (branch->opcode == aco_opcode::p_cbranch_nz) - branch->opcode = aco_opcode::p_cbranch_z; + if (branch.opcode == aco_opcode::p_cbranch_z) + branch.opcode = aco_opcode::p_cbranch_nz; + else if (branch.opcode == aco_opcode::p_cbranch_nz) + branch.opcode = aco_opcode::p_cbranch_z; else assert(false); /* also invert the linear successors */ pred.linear_succs[0] = pred.linear_succs[1]; pred.linear_succs[1] = succ.index; - branch->target[1] = branch->target[0]; - branch->target[0] = succ.index; + branch.target[1] = branch.target[0]; + branch.target[0] = succ.index; } } else { assert(false); } - if (branch->target[0] == branch->target[1]) - branch->opcode = aco_opcode::p_branch; + if (branch.target[0] == branch.target[1]) + branch.opcode = aco_opcode::p_branch; for (unsigned i = 0; i < pred.linear_succs.size(); i++) if (pred.linear_succs[i] == block->index) diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp index 7e9c825..15baa26 100644 --- a/src/amd/compiler/aco_statistics.cpp +++ b/src/amd/compiler/aco_statistics.cpp @@ -46,7 +46,7 @@ void collect_preasm_stats(Program *program) program->statistics[statistic_instructions] += block.instructions.size(); for (aco_ptr& instr : block.instructions) { - if (instr->isSOPP() && instr->sopp()->block != -1) + if (instr->isSOPP() && instr->sopp().block != -1) program->statistics[statistic_branches]++; if (instr->opcode == aco_opcode::p_constaddr) diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 2c53554..3b21741 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -148,10 +148,10 @@ bool validate_ir(Program* program) check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get()); - SDWA_instruction *sdwa = instr->sdwa(); - check(sdwa->omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get()); + SDWA_instruction& sdwa = instr->sdwa(); + check(sdwa.omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get()); if (base_format == Format::VOPC) { - check(sdwa->clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get()); + check(sdwa.clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get()); check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) || program->chip_class >= GFX9, "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get()); @@ -183,21 +183,21 @@ bool validate_ir(Program* program) } if (instr->definitions[0].regClass().is_subdword()) - check((sdwa->dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get()); + check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get()); } /* check opsel */ if (instr->isVOP3()) { - VOP3_instruction *vop3 = instr->vop3(); - check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get()); + VOP3_instruction& vop3 = instr->vop3(); + check(vop3.opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get()); for (unsigned i = 0; i < 3; i++) { if (i >= instr->operands.size() || (instr->operands[i].hasRegClass() && instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())) - check((vop3->opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get()); + check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get()); } if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed()) - check((vop3->opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get()); + check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get()); } /* check for undefs */ @@ -377,7 +377,7 @@ bool validate_ir(Program* program) for (const Operand &op : instr->operands) check(op.regClass().type() == RegType::vgpr, "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", instr.get()); - if (instr->opcode == aco_opcode::p_reduce && instr->reduction()->cluster_size == program->wave_size) + if (instr->opcode == aco_opcode::p_reduce && instr->reduction().cluster_size == program->wave_size) check(instr->definitions[0].regClass().type() == RegType::sgpr, "The result of unclustered reductions must go into an SGPR.", instr.get()); else check(instr->definitions[0].regClass().type() == RegType::vgpr, "The result of scans and clustered reductions must go into a VGPR.", instr.get()); @@ -549,7 +549,7 @@ bool validate_subdword_operand(chip_class chip, const aco_ptr& inst return byte == 0; if (instr->isPseudo() && chip >= GFX8) return true; - if (instr->isSDWA() && (instr->sdwa()->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes())) + if (instr->isSDWA() && (instr->sdwa().sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes())) return true; if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1)) return true; @@ -599,7 +599,7 @@ bool validate_subdword_definition(chip_class chip, const aco_ptr& i if (instr->isPseudo() && chip >= GFX8) return true; - if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes())) + if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes())) return true; if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1)) return true; @@ -630,7 +630,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr if (instr->isPseudo()) return chip >= GFX8 ? def.bytes() : def.size() * 4u; - if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes())) + if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes())) return def.bytes(); switch (instr->opcode) { diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index c0c2165..94105e3 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3) //! v1: %res1 = v_add_u32 %a, %tmp1 //! p_unit_test 1, %res1 tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); - tmp.instr->vop3()->clamp = true; + tmp.instr->vop3().clamp = true; writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp)); //! v1: %tmp2 = v_add_u32 %b, %c @@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3) //! p_unit_test 2, %res2 tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]); tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp); - tmp.instr->vop3()->clamp = true; + tmp.instr->vop3().clamp = true; writeout(2, tmp); finish_opt_test(); diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 2c79203..ec4a140 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -197,7 +197,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) Definition(v0_lo, v1), Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b)); - pseudo->pseudo()->scratch_sgpr = m0; + pseudo->pseudo().scratch_sgpr = m0; //~gfx[67]! p_unit_test 14 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] -- 2.7.4