From e115b01948f90dcb922c626438a0e83a796d5ceb Mon Sep 17 00:00:00 2001
From: Rhys Perry <pendingchaos02@gmail.com>
Date: Thu, 21 Jan 2021 16:13:34 +0000
Subject: [PATCH] aco: return references in instruction cast methods
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel SchÃ¼rmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595>
---
 src/amd/compiler/aco_assembler.cpp             | 260 ++++++++++----------
 src/amd/compiler/aco_insert_NOPs.cpp           |  34 +--
 src/amd/compiler/aco_insert_exec_mask.cpp      |  18 +-
 src/amd/compiler/aco_insert_waitcnt.cpp        |  52 ++--
 src/amd/compiler/aco_instruction_selection.cpp |  32 +--
 src/amd/compiler/aco_ir.cpp                    |  46 ++--
 src/amd/compiler/aco_ir.h                      | 126 +++++-----
 src/amd/compiler/aco_lower_to_hw_instr.cpp     |  34 +--
 src/amd/compiler/aco_opt_value_numbering.cpp   | 212 ++++++++---------
 src/amd/compiler/aco_optimizer.cpp             | 233 +++++++++---------
 src/amd/compiler/aco_print_ir.cpp              | 318 ++++++++++++-------------
 src/amd/compiler/aco_reduce_assign.cpp         |   4 +-
 src/amd/compiler/aco_register_allocation.cpp   |  22 +-
 src/amd/compiler/aco_scheduler.cpp             |  22 +-
 src/amd/compiler/aco_spill.cpp                 |  10 +-
 src/amd/compiler/aco_ssa_elimination.cpp       |  52 ++--
 src/amd/compiler/aco_statistics.cpp            |   2 +-
 src/amd/compiler/aco_validate.cpp              |  24 +-
 src/amd/compiler/tests/test_optimizer.cpp      |   4 +-
 src/amd/compiler/tests/test_to_hw_instr.cpp    |   2 +-
 20 files changed, 753 insertions(+), 754 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index c447516..48a2b55 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -109,7 +109,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::SOPK: {
-      SOPK_instruction *sopk = instr->sopk();
+      SOPK_instruction& sopk = instr->sopk();
 
       if (instr->opcode == aco_opcode::s_subvector_loop_begin) {
          assert(ctx.chip_class >= GFX10);
@@ -121,7 +121,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          /* Adjust s_subvector_loop_begin instruction to the address after the end  */
          out[ctx.subvector_begin_pos] |= (out.size() - ctx.subvector_begin_pos);
          /* Adjust s_subvector_loop_end instruction to the address after the beginning  */
-         sopk->imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
+         sopk.imm = (uint16_t)(ctx.subvector_begin_pos - (int)out.size());
          ctx.subvector_begin_pos = -1;
       }
 
@@ -132,7 +132,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          instr->definitions[0].physReg() << 16 :
          !instr->operands.empty() && instr->operands[0].physReg() <= 127 ?
          instr->operands[0].physReg() << 16 : 0;
-      encoding |= sopk->imm;
+      encoding |= sopk.imm;
       out.push_back(encoding);
       break;
    }
@@ -157,19 +157,19 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::SOPP: {
-      SOPP_instruction* sopp = instr->sopp();
+      SOPP_instruction& sopp = instr->sopp();
       uint32_t encoding = (0b101111111 << 23);
       encoding |= opcode << 16;
-      encoding |= (uint16_t) sopp->imm;
-      if (sopp->block != -1) {
-         sopp->pass_flags = 0;
-         ctx.branches.emplace_back(out.size(), sopp);
+      encoding |= (uint16_t) sopp.imm;
+      if (sopp.block != -1) {
+         sopp.pass_flags = 0;
+         ctx.branches.emplace_back(out.size(), &sopp);
       }
       out.push_back(encoding);
       break;
    }
    case Format::SMEM: {
-      SMEM_instruction* smem = instr->smem();
+      SMEM_instruction& smem = instr->smem();
       bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4);
       bool is_load = !instr->definitions.empty();
       uint32_t encoding = 0;
@@ -196,16 +196,16 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
 
       if (ctx.chip_class <= GFX9) {
          encoding = (0b110000 << 26);
-         assert(!smem->dlc); /* Device-level coherent is not supported on GFX9 and lower */
-         encoding |= smem->nv ? 1 << 15 : 0;
+         assert(!smem.dlc); /* Device-level coherent is not supported on GFX9 and lower */
+         encoding |= smem.nv ? 1 << 15 : 0;
       } else {
          encoding = (0b111101 << 26);
-         assert(!smem->nv); /* Non-volatile is not supported on GFX10 */
-         encoding |= smem->dlc ? 1 << 14 : 0;
+         assert(!smem.nv); /* Non-volatile is not supported on GFX10 */
+         encoding |= smem.dlc ? 1 << 14 : 0;
       }
 
       encoding |= opcode << 18;
-      encoding |= smem->glc ? 1 << 16 : 0;
+      encoding |= smem.glc ? 1 << 16 : 0;
 
       if (ctx.chip_class <= GFX9) {
          if (instr->operands.size() >= 2)
@@ -284,7 +284,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::VINTRP: {
-      Interp_instruction* interp = instr->vintrp();
+      Interp_instruction& interp = instr->vintrp();
       uint32_t encoding = 0;
 
       if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
@@ -304,8 +304,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          out.push_back(encoding);
 
          encoding = 0;
-         encoding |= interp->attribute;
-         encoding |= interp->component << 6;
+         encoding |= interp.attribute;
+         encoding |= interp.component << 6;
          encoding |= instr->operands[0].physReg() << 9;
          if (instr->opcode == aco_opcode::v_interp_p2_f16 ||
              instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
@@ -323,8 +323,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          assert(encoding);
          encoding |= (0xFF & instr->definitions[0].physReg()) << 18;
          encoding |= opcode << 16;
-         encoding |= interp->attribute << 10;
-         encoding |= interp->component << 8;
+         encoding |= interp.attribute << 10;
+         encoding |= interp.component << 8;
          if (instr->opcode == aco_opcode::v_interp_mov_f32)
             encoding |= (0x3 & instr->operands[0].constantValue());
          else
@@ -334,17 +334,17 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::DS: {
-      DS_instruction* ds = instr->ds();
+      DS_instruction& ds = instr->ds();
       uint32_t encoding = (0b110110 << 26);
       if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
          encoding |= opcode << 17;
-         encoding |= (ds->gds ? 1 : 0) << 16;
+         encoding |= (ds.gds ? 1 : 0) << 16;
       } else {
          encoding |= opcode << 18;
-         encoding |= (ds->gds ? 1 : 0) << 17;
+         encoding |= (ds.gds ? 1 : 0) << 17;
       }
-      encoding |= ((0xFF & ds->offset1) << 8);
-      encoding |= (0xFFFF & ds->offset0);
+      encoding |= ((0xFF & ds.offset1) << 8);
+      encoding |= (0xFFFF & ds.offset0);
       out.push_back(encoding);
       encoding = 0;
       unsigned reg = !instr->definitions.empty() ? instr->definitions[0].physReg() : 0;
@@ -358,30 +358,30 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::MUBUF: {
-      MUBUF_instruction* mubuf = instr->mubuf();
+      MUBUF_instruction& mubuf = instr->mubuf();
       uint32_t encoding = (0b111000 << 26);
       encoding |= opcode << 18;
-      encoding |= (mubuf->lds ? 1 : 0) << 16;
-      encoding |= (mubuf->glc ? 1 : 0) << 14;
-      encoding |= (mubuf->idxen ? 1 : 0) << 13;
-      assert(!mubuf->addr64 || ctx.chip_class <= GFX7);
+      encoding |= (mubuf.lds ? 1 : 0) << 16;
+      encoding |= (mubuf.glc ? 1 : 0) << 14;
+      encoding |= (mubuf.idxen ? 1 : 0) << 13;
+      assert(!mubuf.addr64 || ctx.chip_class <= GFX7);
       if (ctx.chip_class == GFX6 || ctx.chip_class == GFX7)
-         encoding |= (mubuf->addr64 ? 1 : 0) << 15;
-      encoding |= (mubuf->offen ? 1 : 0) << 12;
+         encoding |= (mubuf.addr64 ? 1 : 0) << 15;
+      encoding |= (mubuf.offen ? 1 : 0) << 12;
       if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
-         assert(!mubuf->dlc); /* Device-level coherent is not supported on GFX9 and lower */
-         encoding |= (mubuf->slc ? 1 : 0) << 17;
+         assert(!mubuf.dlc); /* Device-level coherent is not supported on GFX9 and lower */
+         encoding |= (mubuf.slc ? 1 : 0) << 17;
       } else if (ctx.chip_class >= GFX10) {
-         encoding |= (mubuf->dlc ? 1 : 0) << 15;
+         encoding |= (mubuf.dlc ? 1 : 0) << 15;
       }
-      encoding |= 0x0FFF & mubuf->offset;
+      encoding |= 0x0FFF & mubuf.offset;
       out.push_back(encoding);
       encoding = 0;
       if (ctx.chip_class <= GFX7 || ctx.chip_class >= GFX10) {
-         encoding |= (mubuf->slc ? 1 : 0) << 22;
+         encoding |= (mubuf.slc ? 1 : 0) << 22;
       }
       encoding |= instr->operands[2].physReg() << 24;
-      encoding |= (mubuf->tfe ? 1 : 0) << 23;
+      encoding |= (mubuf.tfe ? 1 : 0) << 23;
       encoding |= (instr->operands[0].physReg() >> 2) << 16;
       unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
       encoding |= (0xFF & reg) << 8;
@@ -390,17 +390,17 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    }
    case Format::MTBUF: {
-      MTBUF_instruction* mtbuf = instr->mtbuf();
+      MTBUF_instruction& mtbuf = instr->mtbuf();
 
-      uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
+      uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf.dfmt, mtbuf.nfmt);
       uint32_t encoding = (0b111010 << 26);
       assert(img_format <= 0x7F);
-      assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
-      encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
-      encoding |= (mtbuf->glc ? 1 : 0) << 14;
-      encoding |= (mtbuf->idxen ? 1 : 0) << 13;
-      encoding |= (mtbuf->offen ? 1 : 0) << 12;
-      encoding |= 0x0FFF & mtbuf->offset;
+      assert(!mtbuf.dlc || ctx.chip_class >= GFX10);
+      encoding |= (mtbuf.dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
+      encoding |= (mtbuf.glc ? 1 : 0) << 14;
+      encoding |= (mtbuf.idxen ? 1 : 0) << 13;
+      encoding |= (mtbuf.offen ? 1 : 0) << 12;
+      encoding |= 0x0FFF & mtbuf.offset;
       encoding |= (img_format << 19); /* Handles both the GFX10 FORMAT and the old NFMT+DFMT */
 
       if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
@@ -413,8 +413,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       encoding = 0;
 
       encoding |= instr->operands[2].physReg() << 24;
-      encoding |= (mtbuf->tfe ? 1 : 0) << 23;
-      encoding |= (mtbuf->slc ? 1 : 0) << 22;
+      encoding |= (mtbuf.tfe ? 1 : 0) << 23;
+      encoding |= (mtbuf.slc ? 1 : 0) << 22;
       encoding |= (instr->operands[0].physReg() >> 2) << 16;
       unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
       encoding |= (0xFF & reg) << 8;
@@ -437,26 +437,26 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       assert(!use_nsa || ctx.chip_class >= GFX10);
       unsigned nsa_dwords = use_nsa ? DIV_ROUND_UP(addr_dwords - 1, 4) : 0;
 
-      MIMG_instruction* mimg = instr->mimg();
+      MIMG_instruction& mimg = instr->mimg();
       uint32_t encoding = (0b111100 << 26);
-      encoding |= mimg->slc ? 1 << 25 : 0;
+      encoding |= mimg.slc ? 1 << 25 : 0;
       encoding |= opcode << 18;
-      encoding |= mimg->lwe ? 1 << 17 : 0;
-      encoding |= mimg->tfe ? 1 << 16 : 0;
-      encoding |= mimg->glc ? 1 << 13 : 0;
-      encoding |= mimg->unrm ? 1 << 12 : 0;
+      encoding |= mimg.lwe ? 1 << 17 : 0;
+      encoding |= mimg.tfe ? 1 << 16 : 0;
+      encoding |= mimg.glc ? 1 << 13 : 0;
+      encoding |= mimg.unrm ? 1 << 12 : 0;
       if (ctx.chip_class <= GFX9) {
-         assert(!mimg->dlc); /* Device-level coherent is not supported on GFX9 and lower */
-         assert(!mimg->r128);
-         encoding |= mimg->a16 ? 1 << 15 : 0;
-         encoding |= mimg->da ? 1 << 14 : 0;
+         assert(!mimg.dlc); /* Device-level coherent is not supported on GFX9 and lower */
+         assert(!mimg.r128);
+         encoding |= mimg.a16 ? 1 << 15 : 0;
+         encoding |= mimg.da ? 1 << 14 : 0;
       } else {
-         encoding |= mimg->r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
+         encoding |= mimg.r128 ? 1 << 15 : 0; /* GFX10: A16 moved to 2nd word, R128 replaces it in 1st word */
          encoding |= nsa_dwords << 1;
-         encoding |= mimg->dim << 3; /* GFX10: dimensionality instead of declare array */
-         encoding |= mimg->dlc ? 1 << 7 : 0;
+         encoding |= mimg.dim << 3; /* GFX10: dimensionality instead of declare array */
+         encoding |= mimg.dlc ? 1 << 7 : 0;
       }
-      encoding |= (0xF & mimg->dmask) << 8;
+      encoding |= (0xF & mimg.dmask) << 8;
       out.push_back(encoding);
       encoding = (0xFF & instr->operands[3].physReg()); /* VADDR */
       if (!instr->definitions.empty()) {
@@ -468,10 +468,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       if (!instr->operands[1].isUndefined())
          encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
 
-      assert(!mimg->d16 || ctx.chip_class >= GFX9);
-      encoding |= mimg->d16 ? 1 << 15 : 0;
+      assert(!mimg.d16 || ctx.chip_class >= GFX9);
+      encoding |= mimg.d16 ? 1 << 15 : 0;
       if (ctx.chip_class >= GFX10) {
-         encoding |= mimg->a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
+         encoding |= mimg.a16 ? 1 << 14 : 0; /* GFX10: A16 still exists, but is in a different place */
       }
 
       out.push_back(encoding);
@@ -487,33 +487,33 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
    case Format::FLAT:
    case Format::SCRATCH:
    case Format::GLOBAL: {
-      FLAT_instruction *flat = instr->flatlike();
+      FLAT_instruction& flat = instr->flatlike();
       uint32_t encoding = (0b110111 << 26);
       encoding |= opcode << 18;
       if (ctx.chip_class <= GFX9) {
-         assert(flat->offset <= 0x1fff);
-         encoding |= flat->offset & 0x1fff;
+         assert(flat.offset <= 0x1fff);
+         encoding |= flat.offset & 0x1fff;
       } else if (instr->isFlat()) {
          /* GFX10 has a 12-bit immediate OFFSET field,
           * but it has a hw bug: it ignores the offset, called FlatSegmentOffsetBug
           */
-         assert(flat->offset == 0);
+         assert(flat.offset == 0);
       } else {
-         assert(flat->offset <= 0xfff);
-         encoding |= flat->offset & 0xfff;
+         assert(flat.offset <= 0xfff);
+         encoding |= flat.offset & 0xfff;
       }
       if (instr->isScratch())
          encoding |= 1 << 14;
       else if (instr->isGlobal())
          encoding |= 2 << 14;
-      encoding |= flat->lds ? 1 << 13 : 0;
-      encoding |= flat->glc ? 1 << 16 : 0;
-      encoding |= flat->slc ? 1 << 17 : 0;
+      encoding |= flat.lds ? 1 << 13 : 0;
+      encoding |= flat.glc ? 1 << 16 : 0;
+      encoding |= flat.slc ? 1 << 17 : 0;
       if (ctx.chip_class >= GFX10) {
-         assert(!flat->nv);
-         encoding |= flat->dlc ? 1 << 12 : 0;
+         assert(!flat.nv);
+         encoding |= flat.dlc ? 1 << 12 : 0;
       } else {
-         assert(!flat->dlc);
+         assert(!flat.dlc);
       }
       out.push_back(encoding);
       encoding = (0xFF & instr->operands[0].physReg());
@@ -531,12 +531,12 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          else
             encoding |= sgpr_null << 16;
       }
-      encoding |= flat->nv ? 1 << 23 : 0;
+      encoding |= flat.nv ? 1 << 23 : 0;
       out.push_back(encoding);
       break;
    }
    case Format::EXP: {
-      Export_instruction* exp = instr->exp();
+      Export_instruction& exp = instr->exp();
       uint32_t encoding;
       if (ctx.chip_class == GFX8 || ctx.chip_class == GFX9) {
          encoding = (0b110001 << 26);
@@ -544,16 +544,16 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          encoding = (0b111110 << 26);
       }
 
-      encoding |= exp->valid_mask ? 0b1 << 12 : 0;
-      encoding |= exp->done ? 0b1 << 11 : 0;
-      encoding |= exp->compressed ? 0b1 << 10 : 0;
-      encoding |= exp->dest << 4;
-      encoding |= exp->enabled_mask;
+      encoding |= exp.valid_mask ? 0b1 << 12 : 0;
+      encoding |= exp.done ? 0b1 << 11 : 0;
+      encoding |= exp.compressed ? 0b1 << 10 : 0;
+      encoding |= exp.dest << 4;
+      encoding |= exp.enabled_mask;
       out.push_back(encoding);
-      encoding = 0xFF & exp->operands[0].physReg();
-      encoding |= (0xFF & exp->operands[1].physReg()) << 8;
-      encoding |= (0xFF & exp->operands[2].physReg()) << 16;
-      encoding |= (0xFF & exp->operands[3].physReg()) << 24;
+      encoding = 0xFF & exp.operands[0].physReg();
+      encoding |= (0xFF & exp.operands[1].physReg()) << 8;
+      encoding |= (0xFF & exp.operands[2].physReg()) << 16;
+      encoding |= (0xFF & exp.operands[3].physReg()) << 24;
       out.push_back(encoding);
       break;
    }
@@ -564,7 +564,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
       break;
    default:
       if (instr->isVOP3()) {
-         VOP3_instruction* vop3 = instr->vop3();
+         VOP3_instruction& vop3 = instr->vop3();
 
          if (instr->isVOP2()) {
             opcode = opcode + 0x100;
@@ -590,14 +590,14 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
 
          if (ctx.chip_class <= GFX7) {
             encoding |= opcode << 17;
-            encoding |= (vop3->clamp ? 1 : 0) << 11;
+            encoding |= (vop3.clamp ? 1 : 0) << 11;
          } else {
             encoding |= opcode << 16;
-            encoding |= (vop3->clamp ? 1 : 0) << 15;
+            encoding |= (vop3.clamp ? 1 : 0) << 15;
          }
-         encoding |= vop3->opsel << 11;
+         encoding |= vop3.opsel << 11;
          for (unsigned i = 0; i < 3; i++)
-            encoding |= vop3->abs[i] << (8+i);
+            encoding |= vop3.abs[i] << (8+i);
          if (instr->definitions.size() == 2)
             encoding |= instr->definitions[1].physReg() << 8;
          encoding |= (0xFF & instr->definitions[0].physReg());
@@ -609,13 +609,13 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
             for (unsigned i = 0; i < instr->operands.size(); i++)
                encoding |= instr->operands[i].physReg() << (i * 9);
          }
-         encoding |= vop3->omod << 27;
+         encoding |= vop3.omod << 27;
          for (unsigned i = 0; i < 3; i++)
-            encoding |= vop3->neg[i] << (29+i);
+            encoding |= vop3.neg[i] << (29+i);
          out.push_back(encoding);
 
       } else if (instr->isVOP3P()) {
-         VOP3P_instruction* vop3 = instr->vop3p();
+         VOP3P_instruction& vop3 = instr->vop3p();
 
          uint32_t encoding;
          if (ctx.chip_class == GFX9) {
@@ -627,45 +627,45 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          }
 
          encoding |= opcode << 16;
-         encoding |= (vop3->clamp ? 1 : 0) << 15;
-         encoding |= vop3->opsel_lo << 11;
-         encoding |= ((vop3->opsel_hi & 0x4) ? 1 : 0) << 14;
+         encoding |= (vop3.clamp ? 1 : 0) << 15;
+         encoding |= vop3.opsel_lo << 11;
+         encoding |= ((vop3.opsel_hi & 0x4) ? 1 : 0) << 14;
          for (unsigned i = 0; i < 3; i++)
-            encoding |= vop3->neg_hi[i] << (8+i);
+            encoding |= vop3.neg_hi[i] << (8+i);
          encoding |= (0xFF & instr->definitions[0].physReg());
          out.push_back(encoding);
          encoding = 0;
          for (unsigned i = 0; i < instr->operands.size(); i++)
             encoding |= instr->operands[i].physReg() << (i * 9);
-         encoding |= (vop3->opsel_hi & 0x3) << 27;
+         encoding |= (vop3.opsel_hi & 0x3) << 27;
          for (unsigned i = 0; i < 3; i++)
-            encoding |= vop3->neg_lo[i] << (29+i);
+            encoding |= vop3.neg_lo[i] << (29+i);
          out.push_back(encoding);
 
       } else if (instr->isDPP()){
          assert(ctx.chip_class >= GFX8);
-         DPP_instruction* dpp = instr->dpp();
+         DPP_instruction& dpp = instr->dpp();
 
          /* first emit the instruction without the DPP operand */
          Operand dpp_op = instr->operands[0];
          instr->operands[0] = Operand(PhysReg{250}, v1);
          instr->format = (Format) ((uint16_t) instr->format & ~(uint16_t)Format::DPP);
          emit_instruction(ctx, out, instr);
-         uint32_t encoding = (0xF & dpp->row_mask) << 28;
-         encoding |= (0xF & dpp->bank_mask) << 24;
-         encoding |= dpp->abs[1] << 23;
-         encoding |= dpp->neg[1] << 22;
-         encoding |= dpp->abs[0] << 21;
-         encoding |= dpp->neg[0] << 20;
+         uint32_t encoding = (0xF & dpp.row_mask) << 28;
+         encoding |= (0xF & dpp.bank_mask) << 24;
+         encoding |= dpp.abs[1] << 23;
+         encoding |= dpp.neg[1] << 22;
+         encoding |= dpp.abs[0] << 21;
+         encoding |= dpp.neg[0] << 20;
          if (ctx.chip_class >= GFX10)
             encoding |= 1 << 18; /* set Fetch Inactive to match GFX9 behaviour */
-         encoding |= dpp->bound_ctrl << 19;
-         encoding |= dpp->dpp_ctrl << 8;
+         encoding |= dpp.bound_ctrl << 19;
+         encoding |= dpp.dpp_ctrl << 8;
          encoding |= (0xFF) & dpp_op.physReg();
          out.push_back(encoding);
          return;
       } else if (instr->isSDWA()) {
-         SDWA_instruction* sdwa = instr->sdwa();
+         SDWA_instruction& sdwa = instr->sdwa();
 
          /* first emit the instruction without the SDWA operand */
          Operand sdwa_op = instr->operands[0];
@@ -680,27 +680,27 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
                encoding |= instr->definitions[0].physReg() << 8;
                encoding |= 1 << 15;
             }
-            encoding |= (sdwa->clamp ? 1 : 0) << 13;
+            encoding |= (sdwa.clamp ? 1 : 0) << 13;
          } else {
-            encoding |= get_sdwa_sel(sdwa->dst_sel, instr->definitions[0].physReg()) << 8;
-            uint32_t dst_u = sdwa->dst_sel & sdwa_sext ? 1 : 0;
-            if (sdwa->dst_preserve || (sdwa->dst_sel & sdwa_isra))
+            encoding |= get_sdwa_sel(sdwa.dst_sel, instr->definitions[0].physReg()) << 8;
+            uint32_t dst_u = sdwa.dst_sel & sdwa_sext ? 1 : 0;
+            if (sdwa.dst_preserve || (sdwa.dst_sel & sdwa_isra))
                dst_u = 2;
             encoding |= dst_u << 11;
-            encoding |= (sdwa->clamp ? 1 : 0) << 13;
-            encoding |= sdwa->omod << 14;
+            encoding |= (sdwa.clamp ? 1 : 0) << 13;
+            encoding |= sdwa.omod << 14;
          }
 
-         encoding |= get_sdwa_sel(sdwa->sel[0], sdwa_op.physReg()) << 16;
-         encoding |= sdwa->sel[0] & sdwa_sext ? 1 << 19 : 0;
-         encoding |= sdwa->abs[0] << 21;
-         encoding |= sdwa->neg[0] << 20;
+         encoding |= get_sdwa_sel(sdwa.sel[0], sdwa_op.physReg()) << 16;
+         encoding |= sdwa.sel[0] & sdwa_sext ? 1 << 19 : 0;
+         encoding |= sdwa.abs[0] << 21;
+         encoding |= sdwa.neg[0] << 20;
 
          if (instr->operands.size() >= 2) {
-            encoding |= get_sdwa_sel(sdwa->sel[1], instr->operands[1].physReg()) << 24;
-            encoding |= sdwa->sel[1] & sdwa_sext ? 1 << 27 : 0;
-            encoding |= sdwa->abs[1] << 29;
-            encoding |= sdwa->neg[1] << 28;
+            encoding |= get_sdwa_sel(sdwa.sel[1], instr->operands[1].physReg()) << 24;
+            encoding |= sdwa.sel[1] & sdwa_sext ? 1 << 27 : 0;
+            encoding |= sdwa.abs[1] << 29;
+            encoding |= sdwa.neg[1] << 28;
          }
 
          encoding |= 0xFF & sdwa_op.physReg();
@@ -750,16 +750,16 @@ void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
       while ( it != block.instructions.rend())
       {
          if ((*it)->isEXP()) {
-            Export_instruction* exp = (*it)->exp();
+            Export_instruction& exp = (*it)->exp();
             if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
-               if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= (V_008DFC_SQ_EXP_POS + 3)) {
-                  exp->done = true;
+               if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
+                  exp.done = true;
                   exported = true;
                   break;
                }
             } else {
-               exp->done = true;
-               exp->valid_mask = true;
+               exp.done = true;
+               exp.valid_mask = true;
                exported = true;
                break;
             }
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 5c59e6a..57b6f1b 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -180,7 +180,7 @@ struct NOP_ctx_gfx10 {
 int get_wait_states(aco_ptr<Instruction>& instr)
 {
    if (instr->opcode == aco_opcode::s_nop)
-      return instr->sopp()->imm + 1;
+      return instr->sopp().imm + 1;
    else if (instr->opcode == aco_opcode::p_constaddr)
       return 3; /* lowered to 3 instructions in the assembler */
    else
@@ -351,7 +351,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
 
       if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_ttracedata)
          NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
-   } else if (instr->isDS() && instr->ds()->gds) {
+   } else if (instr->isDS() && instr->ds().gds) {
       NOPs = MAX2(NOPs, ctx.salu_wr_m0_then_gds_msg_ttrace);
    } else if (instr->isVALU() || instr->isVINTRP()) {
       for (Operand op : instr->operands) {
@@ -407,7 +407,7 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
 
    if (program->chip_class == GFX9) {
       bool lds_scratch_global = (instr->isScratch() || instr->isGlobal()) &&
-                                instr->flatlike()->lds;
+                                instr->flatlike().lds;
       if (instr->isVINTRP() ||
           instr->opcode == aco_opcode::ds_read_addtid_b32 ||
           instr->opcode == aco_opcode::ds_write_addtid_b32 ||
@@ -480,10 +480,10 @@ void handle_instruction_gfx6(Program *program, Block *cur_block, NOP_ctx_gfx6 &c
             ctx.salu_wr_m0_then_moverel = 1;
          }
       } else if (instr->opcode == aco_opcode::s_setreg_b32 || instr->opcode == aco_opcode::s_setreg_imm32_b32) {
-         SOPK_instruction *sopk = instr->sopk();
-         unsigned offset = (sopk->imm >> 6) & 0x1f;
-         unsigned size = ((sopk->imm >> 11) & 0x1f) + 1;
-         unsigned reg = sopk->imm & 0x3f;
+         SOPK_instruction& sopk = instr->sopk();
+         unsigned offset = (sopk.imm >> 6) & 0x1f;
+         unsigned size = ((sopk.imm >> 11) & 0x1f) + 1;
+         unsigned reg = sopk.imm & 0x3f;
          ctx.setreg_then_getsetreg = 2;
 
          if (reg == 1 && offset >= 28 && size > (28 - offset))
@@ -603,13 +603,13 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
    } else if (instr->isSALU() || instr->isSMEM()) {
       if (instr->opcode == aco_opcode::s_waitcnt) {
          /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
-         uint16_t imm = instr->sopp()->imm;
+         uint16_t imm = instr->sopp().imm;
          unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
          if (vmcnt == 0)
             ctx.sgprs_read_by_VMEM.reset();
       } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
          /* Hazard is mitigated by a s_waitcnt_depctr with a magic imm */
-         if (instr->sopp()->imm == 0xffe3)
+         if (instr->sopp().imm == 0xffe3)
             ctx.sgprs_read_by_VMEM.reset();
       }
 
@@ -667,7 +667,7 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
       }
    } else if (instr->opcode == aco_opcode::s_waitcnt_depctr) {
       /* s_waitcnt_depctr can mitigate the problem if it has a magic imm */
-      if ((instr->sopp()->imm & 0xfffe) == 0xfffe)
+      if ((instr->sopp().imm & 0xfffe) == 0xfffe)
          ctx.has_nonVALU_exec_read = false;
    }
 
@@ -694,12 +694,12 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
          ctx.sgprs_read_by_SMEM.reset();
       } else {
          /* Reducing lgkmcnt count to 0 always mitigates the hazard. */
-         const SOPP_instruction *sopp = instr->sopp();
-         if (sopp->opcode == aco_opcode::s_waitcnt_lgkmcnt) {
-            if (sopp->imm == 0 && sopp->definitions[0].physReg() == sgpr_null)
+         const SOPP_instruction& sopp = instr->sopp();
+         if (sopp.opcode == aco_opcode::s_waitcnt_lgkmcnt) {
+            if (sopp.imm == 0 && sopp.definitions[0].physReg() == sgpr_null)
                ctx.sgprs_read_by_SMEM.reset();
-         } else if (sopp->opcode == aco_opcode::s_waitcnt) {
-            unsigned lgkm = (sopp->imm >> 8) & 0x3f;
+         } else if (sopp.opcode == aco_opcode::s_waitcnt) {
+            unsigned lgkm = (sopp.imm >> 8) & 0x3f;
             if (lgkm == 0)
                ctx.sgprs_read_by_SMEM.reset();
          }
@@ -724,8 +724,8 @@ void handle_instruction_gfx10(Program *program, Block *cur_block, NOP_ctx_gfx10
       ctx.has_branch_after_DS = ctx.has_DS;
    } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) {
       /* Only s_waitcnt_vscnt can mitigate the hazard */
-      const SOPK_instruction *sopk = instr->sopk();
-      if (sopk->definitions[0].physReg() == sgpr_null && sopk->imm == 0)
+      const SOPK_instruction& sopk = instr->sopk();
+      if (sopk.definitions[0].physReg() == sgpr_null && sopk.imm == 0)
          ctx.has_VMEM = ctx.has_branch_after_VMEM = ctx.has_DS = ctx.has_branch_after_DS = false;
    }
    if ((ctx.has_VMEM && ctx.has_branch_after_DS) || (ctx.has_DS && ctx.has_branch_after_VMEM)) {
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp
index d5153a1..cdc7a70 100644
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -98,13 +98,13 @@ struct exec_ctx {
 
 bool needs_exact(aco_ptr<Instruction>& instr) {
    if (instr->isMUBUF()) {
-      return instr->mubuf()->disable_wqm;
+      return instr->mubuf().disable_wqm;
    } else if (instr->isMTBUF()) {
-      return instr->mtbuf()->disable_wqm;
+      return instr->mtbuf().disable_wqm;
    } else if (instr->isMIMG()) {
-      return instr->mimg()->disable_wqm;
+      return instr->mimg().disable_wqm;
    } else if (instr->isFlatLike()) {
-      return instr->flatlike()->disable_wqm;
+      return instr->flatlike().disable_wqm;
    } else {
       return instr->isEXP();
    }
@@ -908,12 +908,12 @@ void add_branch_code(exec_ctx& ctx, Block* block)
    }
 
    if (block->kind & block_kind_uniform) {
-      Pseudo_branch_instruction* branch = block->instructions.back()->branch();
-      if (branch->opcode == aco_opcode::p_branch) {
-         branch->target[0] = block->linear_succs[0];
+      Pseudo_branch_instruction& branch = block->instructions.back()->branch();
+      if (branch.opcode == aco_opcode::p_branch) {
+         branch.target[0] = block->linear_succs[0];
       } else {
-         branch->target[0] = block->linear_succs[1];
-         branch->target[1] = block->linear_succs[0];
+         branch.target[0] = block->linear_succs[1];
+         branch.target[1] = block->linear_succs[0];
       }
       return;
    }
diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp
index 2c5c115..30155e3 100644
--- a/src/amd/compiler/aco_insert_waitcnt.cpp
+++ b/src/amd/compiler/aco_insert_waitcnt.cpp
@@ -422,7 +422,7 @@ wait_imm check_instr(Instruction* instr, wait_ctx& ctx)
             continue;
 
          /* LDS reads and writes return in the order they were issued. same for GDS */
-         if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds()->gds ? event_gds : event_lds))
+         if (instr->isDS() && (it->second.events & lgkm_events) == (instr->ds().gds ? event_gds : event_lds))
             continue;
 
          wait.combine(it->second.imm);
@@ -437,10 +437,10 @@ wait_imm parse_wait_instr(wait_ctx& ctx, Instruction *instr)
    if (instr->opcode == aco_opcode::s_waitcnt_vscnt &&
        instr->definitions[0].physReg() == sgpr_null) {
       wait_imm imm;
-      imm.vs = std::min<uint8_t>(imm.vs, instr->sopk()->imm);
+      imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm);
       return imm;
    } else if (instr->opcode == aco_opcode::s_waitcnt) {
-      return wait_imm(ctx.chip_class, instr->sopp()->imm);
+      return wait_imm(ctx.chip_class, instr->sopp().imm);
    }
    return wait_imm();
 }
@@ -521,15 +521,15 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
        * TODO: Refine this when we have proper alias analysis.
        */
       if (ctx.pending_s_buffer_store &&
-          !instr->smem()->definitions.empty() &&
-          !instr->smem()->sync.can_reorder()) {
+          !instr->smem().definitions.empty() &&
+          !instr->smem().sync.can_reorder()) {
          imm.lgkm = 0;
       }
    }
 
    if (ctx.program->early_rast && instr->opcode == aco_opcode::exp) {
-      if (instr->exp()->dest >= V_008DFC_SQ_EXP_POS &&
-          instr->exp()->dest < V_008DFC_SQ_EXP_PRIM) {
+      if (instr->exp().dest >= V_008DFC_SQ_EXP_POS &&
+          instr->exp().dest < V_008DFC_SQ_EXP_PRIM) {
 
          /* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export.
           * Wait for all stores (and atomics) to complete, so PS can read them.
@@ -543,7 +543,7 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
    }
 
    if (instr->opcode == aco_opcode::p_barrier)
-      imm.combine(perform_barrier(ctx, instr->barrier()->sync, semantic_acqrel));
+      imm.combine(perform_barrier(ctx, instr->barrier().sync, semantic_acqrel));
    else
       imm.combine(perform_barrier(ctx, sync_info, semantic_release));
 
@@ -760,12 +760,12 @@ void gen(Instruction* instr, wait_ctx& ctx)
 {
    switch (instr->format) {
    case Format::EXP: {
-      Export_instruction* exp_instr = instr->exp();
+      Export_instruction& exp_instr = instr->exp();
 
       wait_event ev;
-      if (exp_instr->dest <= 9)
+      if (exp_instr.dest <= 9)
          ev = event_exp_mrt_null;
-      else if (exp_instr->dest <= 15)
+      else if (exp_instr.dest <= 15)
          ev = event_exp_pos;
       else
          ev = event_exp_param;
@@ -774,10 +774,10 @@ void gen(Instruction* instr, wait_ctx& ctx)
       /* insert new entries for exported vgprs */
       for (unsigned i = 0; i < 4; i++)
       {
-         if (exp_instr->enabled_mask & (1 << i)) {
-            unsigned idx = exp_instr->compressed ? i >> 1 : i;
-            assert(idx < exp_instr->operands.size());
-            insert_wait_entry(ctx, exp_instr->operands[idx], ev);
+         if (exp_instr.enabled_mask & (1 << i)) {
+            unsigned idx = exp_instr.compressed ? i >> 1 : i;
+            assert(idx < exp_instr.operands.size());
+            insert_wait_entry(ctx, exp_instr.operands[idx], ev);
 
          }
       }
@@ -785,38 +785,38 @@ void gen(Instruction* instr, wait_ctx& ctx)
       break;
    }
    case Format::FLAT: {
-      FLAT_instruction *flat = instr->flat();
+      FLAT_instruction& flat = instr->flat();
       if (ctx.chip_class < GFX10 && !instr->definitions.empty())
-         update_counters_for_flat_load(ctx, flat->sync);
+         update_counters_for_flat_load(ctx, flat.sync);
       else
-         update_counters(ctx, event_flat, flat->sync);
+         update_counters(ctx, event_flat, flat.sync);
 
       if (!instr->definitions.empty())
          insert_wait_entry(ctx, instr->definitions[0], event_flat);
       break;
    }
    case Format::SMEM: {
-      SMEM_instruction *smem = instr->smem();
-      update_counters(ctx, event_smem, smem->sync);
+      SMEM_instruction& smem = instr->smem();
+      update_counters(ctx, event_smem, smem.sync);
 
       if (!instr->definitions.empty())
          insert_wait_entry(ctx, instr->definitions[0], event_smem);
       else if (ctx.chip_class >= GFX10 &&
-               !smem->sync.can_reorder())
+               !smem.sync.can_reorder())
          ctx.pending_s_buffer_store = true;
 
       break;
    }
    case Format::DS: {
-      DS_instruction *ds = instr->ds();
-      update_counters(ctx, ds->gds ? event_gds : event_lds, ds->sync);
-      if (ds->gds)
+      DS_instruction& ds = instr->ds();
+      update_counters(ctx, ds.gds ? event_gds : event_lds, ds.sync);
+      if (ds.gds)
          update_counters(ctx, event_gds_gpr_lock);
 
       if (!instr->definitions.empty())
-         insert_wait_entry(ctx, instr->definitions[0], ds->gds ? event_gds : event_lds);
+         insert_wait_entry(ctx, instr->definitions[0], ds.gds ? event_gds : event_lds);
 
-      if (ds->gds) {
+      if (ds.gds) {
          for (const Operand& op : instr->operands)
             insert_wait_entry(ctx, op, event_gds_gpr_lock);
          insert_wait_entry(ctx, exec, s2, event_gds_gpr_lock, false);
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 4c27e58..0d3e50f 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
    Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
 
    Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
-   add->vop3()->neg[1] = true;
+   add->vop3().neg[1] = true;
 
    return add->definitions[0].getTemp();
 }
@@ -1692,7 +1692,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
                std::swap(src0, src1);
             add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
          }
-         add_instr->vop3()->clamp = 1;
+         add_instr->vop3().clamp = 1;
       } else if (dst.regClass() == v1) {
          if (ctx->options->chip_class >= GFX9) {
             aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
@@ -1944,9 +1944,9 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
    case nir_op_fsub: {
       if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
          Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
-         VOP3P_instruction* sub = add->vop3p();
-         sub->neg_lo[1] = true;
-         sub->neg_hi[1] = true;
+         VOP3P_instruction& sub = add->vop3p();
+         sub.neg_lo[1] = true;
+         sub.neg_hi[1] = true;
          break;
       }
 
@@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       } else if (dst.regClass() == v2) {
          Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
                                      as_vgpr(ctx, src0), as_vgpr(ctx, src1));
-         add->vop3()->neg[1] = true;
+         add->vop3().neg[1] = true;
       } else {
          isel_err(&instr->instr, "Unimplemented NIR instr bit size");
       }
@@ -2101,7 +2101,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          Temp src = get_alu_src_vop3p(ctx, instr->src[0]);
          Instruction* vop3p = bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0x3C00)),
                                         instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
-         vop3p->vop3p()->clamp = true;
+         vop3p->vop3p().clamp = true;
          emit_split_vector(ctx, dst, 2);
          break;
       }
@@ -2114,7 +2114,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          // TODO: confirm that this holds under any circumstances
       } else if (dst.regClass() == v2) {
          Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
-         add->vop3()->clamp = true;
+         add->vop3().clamp = true;
       } else {
          isel_err(&instr->instr, "Unimplemented NIR instr bit size");
       }
@@ -2253,12 +2253,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
             Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi));
             Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
             Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
-            sub->vop3()->neg[1] = true;
+            sub->vop3().neg[1] = true;
             tmp = sub->definitions[0].getTemp();
 
             Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu));
             Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v);
-            vop3->vop3()->abs[0] = true;
+            vop3->vop3().abs[0] = true;
             Temp cond = vop3->definitions[0].getTemp();
 
             Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
@@ -2924,7 +2924,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
          Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u));
          Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest);
-         vop3->vop3()->abs[0] = true;
+         vop3->vop3().abs[0] = true;
          cmp_res = vop3->definitions[0].getTemp();
       }
 
@@ -3515,7 +3515,7 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
       instr = bld.ds(op, Definition(val), offset, m, const_offset, const_offset + 1);
    else
       instr = bld.ds(op, Definition(val), offset, m, const_offset);
-   instr->ds()->sync = info.sync;
+   instr->ds().sync = info.sync;
 
    if (size < 4)
       val = bld.pseudo(aco_opcode::p_extract_vector, bld.def(RegClass::get(RegType::vgpr, size)), val, Operand(0u));
@@ -3931,7 +3931,7 @@ void store_lds(isel_context *ctx, unsigned elem_size_bytes, Temp data, uint32_t
       } else {
          instr = bld.ds(op, address_offset, split_data, m, inline_offset);
       }
-      instr->ds()->sync = memory_sync_info(storage_shared);
+      instr->ds().sync = memory_sync_info(storage_shared);
    }
 }
 
@@ -4094,7 +4094,7 @@ void emit_single_mubuf_store(isel_context *ctx, Temp descriptor, Temp voffset, T
                                  /* idxen*/ false, /* addr64 */ false, /* disable_wqm */ false, /* glc */ true,
                                  /* dlc*/ false, /* slc */ slc);
 
-   r.instr->mubuf()->sync = sync;
+   r.instr->mubuf().sync = sync;
 }
 
 void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset, Temp soffset,
@@ -5503,7 +5503,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr)
       unreachable("unimplemented or forbidden load_push_constant.");
    }
 
-   bld.smem(op, Definition(vec), ptr, index).instr->smem()->prevent_overflow = true;
+   bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true;
 
    if (!aligned) {
       Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index);
@@ -7147,7 +7147,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
    for (unsigned i = 0; i < write_count; i++) {
       aco_opcode op = get_buffer_store_op(write_datas[i].bytes());
       Instruction *mubuf = bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_datas[i], offsets[i], true, true);
-      mubuf->mubuf()->sync = memory_sync_info(storage_scratch, semantic_private);
+      mubuf->mubuf().sync = memory_sync_info(storage_scratch, semantic_private);
    }
 }
 
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index ed3a43a..90d0f5e 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -140,19 +140,19 @@ memory_sync_info get_sync_info(const Instruction* instr)
 {
    switch (instr->format) {
    case Format::SMEM:
-      return instr->smem()->sync;
+      return instr->smem().sync;
    case Format::MUBUF:
-      return instr->mubuf()->sync;
+      return instr->mubuf().sync;
    case Format::MIMG:
-      return instr->mimg()->sync;
+      return instr->mimg().sync;
    case Format::MTBUF:
-      return instr->mtbuf()->sync;
+      return instr->mtbuf().sync;
    case Format::FLAT:
    case Format::GLOBAL:
    case Format::SCRATCH:
-      return instr->flatlike()->sync;
+      return instr->flatlike().sync;
    case Format::DS:
-      return instr->ds()->sync;
+      return instr->ds().sync;
    default:
       return memory_sync_info();
    }
@@ -170,12 +170,12 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
       return true;
 
    if (instr->isVOP3()) {
-      VOP3_instruction *vop3 = instr->vop3();
+      VOP3_instruction& vop3 = instr->vop3();
       if (instr->format == Format::VOP3)
          return false;
-      if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
+      if (vop3.clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
          return false;
-      if (vop3->omod && chip < GFX9)
+      if (vop3.omod && chip < GFX9)
          return false;
 
       //TODO: return true if we know we will use vcc
@@ -232,14 +232,14 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
    std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
    std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(), instr->definitions.begin());
 
-   SDWA_instruction *sdwa = instr->sdwa();
+   SDWA_instruction& sdwa = instr->sdwa();
 
    if (tmp->isVOP3()) {
-      VOP3_instruction *vop3 = tmp->vop3();
-      memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg));
-      memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs));
-      sdwa->omod = vop3->omod;
-      sdwa->clamp = vop3->clamp;
+      VOP3_instruction& vop3 = tmp->vop3();
+      memcpy(sdwa.neg, vop3.neg, sizeof(sdwa.neg));
+      memcpy(sdwa.abs, vop3.abs, sizeof(sdwa.abs));
+      sdwa.omod = vop3.omod;
+      sdwa.clamp = vop3.clamp;
    }
 
    for (unsigned i = 0; i < instr->operands.size(); i++) {
@@ -249,27 +249,27 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
 
       switch (instr->operands[i].bytes()) {
       case 1:
-         sdwa->sel[i] = sdwa_ubyte;
+         sdwa.sel[i] = sdwa_ubyte;
          break;
       case 2:
-         sdwa->sel[i] = sdwa_uword;
+         sdwa.sel[i] = sdwa_uword;
          break;
       case 4:
-         sdwa->sel[i] = sdwa_udword;
+         sdwa.sel[i] = sdwa_udword;
          break;
       }
    }
    switch (instr->definitions[0].bytes()) {
    case 1:
-      sdwa->dst_sel = sdwa_ubyte;
-      sdwa->dst_preserve = true;
+      sdwa.dst_sel = sdwa_ubyte;
+      sdwa.dst_preserve = true;
       break;
    case 2:
-      sdwa->dst_sel = sdwa_uword;
-      sdwa->dst_preserve = true;
+      sdwa.dst_sel = sdwa_uword;
+      sdwa.dst_preserve = true;
       break;
    case 4:
-      sdwa->dst_sel = sdwa_udword;
+      sdwa.dst_sel = sdwa_udword;
       break;
    }
 
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 62e4f0d..94564ff 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1030,93 +1030,93 @@ struct Instruction {
       return false;
    }
 
-   Pseudo_instruction *pseudo() noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;}
-   const Pseudo_instruction *pseudo() const noexcept {assert(isPseudo()); return (Pseudo_instruction *)this;}
+   Pseudo_instruction& pseudo() noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;}
+   const Pseudo_instruction& pseudo() const noexcept {assert(isPseudo()); return *(Pseudo_instruction *)this;}
    constexpr bool isPseudo() const noexcept {return format == Format::PSEUDO;}
-   SOP1_instruction *sop1() noexcept {assert(isSOP1()); return (SOP1_instruction *)this;}
-   const SOP1_instruction *sop1() const noexcept {assert(isSOP1()); return (SOP1_instruction *)this;}
+   SOP1_instruction& sop1() noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;}
+   const SOP1_instruction& sop1() const noexcept {assert(isSOP1()); return *(SOP1_instruction *)this;}
    constexpr bool isSOP1() const noexcept {return format == Format::SOP1;}
-   SOP2_instruction *sop2() noexcept {assert(isSOP2()); return (SOP2_instruction *)this;}
-   const SOP2_instruction *sop2() const noexcept {assert(isSOP2()); return (SOP2_instruction *)this;}
+   SOP2_instruction& sop2() noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;}
+   const SOP2_instruction& sop2() const noexcept {assert(isSOP2()); return *(SOP2_instruction *)this;}
    constexpr bool isSOP2() const noexcept {return format == Format::SOP2;}
-   SOPK_instruction *sopk() noexcept {assert(isSOPK()); return (SOPK_instruction *)this;}
-   const SOPK_instruction *sopk() const noexcept {assert(isSOPK()); return (SOPK_instruction *)this;}
+   SOPK_instruction& sopk() noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;}
+   const SOPK_instruction& sopk() const noexcept {assert(isSOPK()); return *(SOPK_instruction *)this;}
    constexpr bool isSOPK() const noexcept {return format == Format::SOPK;}
-   SOPP_instruction *sopp() noexcept {assert(isSOPP()); return (SOPP_instruction *)this;}
-   const SOPP_instruction *sopp() const noexcept {assert(isSOPP()); return (SOPP_instruction *)this;}
+   SOPP_instruction& sopp() noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;}
+   const SOPP_instruction& sopp() const noexcept {assert(isSOPP()); return *(SOPP_instruction *)this;}
    constexpr bool isSOPP() const noexcept {return format == Format::SOPP;}
-   SOPC_instruction *sopc() noexcept {assert(isSOPC()); return (SOPC_instruction *)this;}
-   const SOPC_instruction *sopc() const noexcept {assert(isSOPC()); return (SOPC_instruction *)this;}
+   SOPC_instruction& sopc() noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;}
+   const SOPC_instruction& sopc() const noexcept {assert(isSOPC()); return *(SOPC_instruction *)this;}
    constexpr bool isSOPC() const noexcept {return format == Format::SOPC;}
-   SMEM_instruction *smem() noexcept {assert(isSMEM()); return (SMEM_instruction *)this;}
-   const SMEM_instruction *smem() const noexcept {assert(isSMEM()); return (SMEM_instruction *)this;}
+   SMEM_instruction& smem() noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;}
+   const SMEM_instruction& smem() const noexcept {assert(isSMEM()); return *(SMEM_instruction *)this;}
    constexpr bool isSMEM() const noexcept {return format == Format::SMEM;}
-   DS_instruction *ds() noexcept {assert(isDS()); return (DS_instruction *)this;}
-   const DS_instruction *ds() const noexcept {assert(isDS()); return (DS_instruction *)this;}
+   DS_instruction& ds() noexcept {assert(isDS()); return *(DS_instruction *)this;}
+   const DS_instruction& ds() const noexcept {assert(isDS()); return *(DS_instruction *)this;}
    constexpr bool isDS() const noexcept {return format == Format::DS;}
-   MTBUF_instruction *mtbuf() noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;}
-   const MTBUF_instruction *mtbuf() const noexcept {assert(isMTBUF()); return (MTBUF_instruction *)this;}
+   MTBUF_instruction& mtbuf() noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;}
+   const MTBUF_instruction& mtbuf() const noexcept {assert(isMTBUF()); return *(MTBUF_instruction *)this;}
    constexpr bool isMTBUF() const noexcept {return format == Format::MTBUF;}
-   MUBUF_instruction *mubuf() noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;}
-   const MUBUF_instruction *mubuf() const noexcept {assert(isMUBUF()); return (MUBUF_instruction *)this;}
+   MUBUF_instruction& mubuf() noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;}
+   const MUBUF_instruction& mubuf() const noexcept {assert(isMUBUF()); return *(MUBUF_instruction *)this;}
    constexpr bool isMUBUF() const noexcept {return format == Format::MUBUF;}
-   MIMG_instruction *mimg() noexcept {assert(isMIMG()); return (MIMG_instruction *)this;}
-   const MIMG_instruction *mimg() const noexcept {assert(isMIMG()); return (MIMG_instruction *)this;}
+   MIMG_instruction& mimg() noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;}
+   const MIMG_instruction& mimg() const noexcept {assert(isMIMG()); return *(MIMG_instruction *)this;}
    constexpr bool isMIMG() const noexcept {return format == Format::MIMG;}
-   Export_instruction *exp() noexcept {assert(isEXP()); return (Export_instruction *)this;}
-   const Export_instruction *exp() const noexcept {assert(isEXP()); return (Export_instruction *)this;}
+   Export_instruction& exp() noexcept {assert(isEXP()); return *(Export_instruction *)this;}
+   const Export_instruction& exp() const noexcept {assert(isEXP()); return *(Export_instruction *)this;}
    constexpr bool isEXP() const noexcept {return format == Format::EXP;}
-   FLAT_instruction *flat() noexcept {assert(isFlat()); return (FLAT_instruction *)this;}
-   const FLAT_instruction *flat() const noexcept {assert(isFlat()); return (FLAT_instruction *)this;}
+   FLAT_instruction& flat() noexcept {assert(isFlat()); return *(FLAT_instruction *)this;}
+   const FLAT_instruction& flat() const noexcept {assert(isFlat()); return *(FLAT_instruction *)this;}
    constexpr bool isFlat() const noexcept {return format == Format::FLAT;}
-   FLAT_instruction *global() noexcept {assert(isGlobal()); return (FLAT_instruction *)this;}
-   const FLAT_instruction *global() const noexcept {assert(isGlobal()); return (FLAT_instruction *)this;}
+   FLAT_instruction& global() noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;}
+   const FLAT_instruction& global() const noexcept {assert(isGlobal()); return *(FLAT_instruction *)this;}
    constexpr bool isGlobal() const noexcept {return format == Format::GLOBAL;}
-   FLAT_instruction *scratch() noexcept {assert(isScratch()); return (FLAT_instruction *)this;}
-   const FLAT_instruction *scratch() const noexcept {assert(isScratch()); return (FLAT_instruction *)this;}
+   FLAT_instruction& scratch() noexcept {assert(isScratch()); return *(FLAT_instruction *)this;}
+   const FLAT_instruction& scratch() const noexcept {assert(isScratch()); return *(FLAT_instruction *)this;}
    constexpr bool isScratch() const noexcept {return format == Format::SCRATCH;}
-   Pseudo_branch_instruction *branch() noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;}
-   const Pseudo_branch_instruction *branch() const noexcept {assert(isBranch()); return (Pseudo_branch_instruction *)this;}
+   Pseudo_branch_instruction& branch() noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;}
+   const Pseudo_branch_instruction& branch() const noexcept {assert(isBranch()); return *(Pseudo_branch_instruction *)this;}
    constexpr bool isBranch() const noexcept {return format == Format::PSEUDO_BRANCH;}
-   Pseudo_barrier_instruction *barrier() noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;}
-   const Pseudo_barrier_instruction *barrier() const noexcept {assert(isBarrier()); return (Pseudo_barrier_instruction *)this;}
+   Pseudo_barrier_instruction& barrier() noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;}
+   const Pseudo_barrier_instruction& barrier() const noexcept {assert(isBarrier()); return *(Pseudo_barrier_instruction *)this;}
    constexpr bool isBarrier() const noexcept {return format == Format::PSEUDO_BARRIER;}
-   Pseudo_reduction_instruction *reduction() noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;}
-   const Pseudo_reduction_instruction *reduction() const noexcept {assert(isReduction()); return (Pseudo_reduction_instruction *)this;}
+   Pseudo_reduction_instruction& reduction() noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;}
+   const Pseudo_reduction_instruction& reduction() const noexcept {assert(isReduction()); return *(Pseudo_reduction_instruction *)this;}
    constexpr bool isReduction() const noexcept {return format == Format::PSEUDO_REDUCTION;}
-   VOP3P_instruction *vop3p() noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;}
-   const VOP3P_instruction *vop3p() const noexcept {assert(isVOP3P()); return (VOP3P_instruction *)this;}
+   VOP3P_instruction& vop3p() noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;}
+   const VOP3P_instruction& vop3p() const noexcept {assert(isVOP3P()); return *(VOP3P_instruction *)this;}
    constexpr bool isVOP3P() const noexcept {return format == Format::VOP3P;}
-   VOP1_instruction *vop1() noexcept {assert(isVOP1()); return (VOP1_instruction *)this;}
-   const VOP1_instruction *vop1() const noexcept {assert(isVOP1()); return (VOP1_instruction *)this;}
+   VOP1_instruction& vop1() noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;}
+   const VOP1_instruction& vop1() const noexcept {assert(isVOP1()); return *(VOP1_instruction *)this;}
    constexpr bool isVOP1() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP1;}
-   VOP2_instruction *vop2() noexcept {assert(isVOP2()); return (VOP2_instruction *)this;}
-   const VOP2_instruction *vop2() const noexcept {assert(isVOP2()); return (VOP2_instruction *)this;}
+   VOP2_instruction& vop2() noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;}
+   const VOP2_instruction& vop2() const noexcept {assert(isVOP2()); return *(VOP2_instruction *)this;}
    constexpr bool isVOP2() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP2;}
-   VOPC_instruction *vopc() noexcept {assert(isVOPC()); return (VOPC_instruction *)this;}
-   const VOPC_instruction *vopc() const noexcept {assert(isVOPC()); return (VOPC_instruction *)this;}
+   VOPC_instruction& vopc() noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;}
+   const VOPC_instruction& vopc() const noexcept {assert(isVOPC()); return *(VOPC_instruction *)this;}
    constexpr bool isVOPC() const noexcept {return (uint16_t)format & (uint16_t)Format::VOPC;}
-   VOP3_instruction *vop3() noexcept {assert(isVOP3()); return (VOP3_instruction *)this;}
-   const VOP3_instruction *vop3() const noexcept {assert(isVOP3()); return (VOP3_instruction *)this;}
+   VOP3_instruction& vop3() noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;}
+   const VOP3_instruction& vop3() const noexcept {assert(isVOP3()); return *(VOP3_instruction *)this;}
    constexpr bool isVOP3() const noexcept {return (uint16_t)format & (uint16_t)Format::VOP3;}
-   Interp_instruction *vintrp() noexcept {assert(isVINTRP()); return (Interp_instruction *)this;}
-   const Interp_instruction *vintrp() const noexcept {assert(isVINTRP()); return (Interp_instruction *)this;}
+   Interp_instruction& vintrp() noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;}
+   const Interp_instruction& vintrp() const noexcept {assert(isVINTRP()); return *(Interp_instruction *)this;}
    constexpr bool isVINTRP() const noexcept {return (uint16_t)format & (uint16_t)Format::VINTRP;}
-   DPP_instruction *dpp() noexcept {assert(isDPP()); return (DPP_instruction *)this;}
-   const DPP_instruction *dpp() const noexcept {assert(isDPP()); return (DPP_instruction *)this;}
+   DPP_instruction& dpp() noexcept {assert(isDPP()); return *(DPP_instruction *)this;}
+   const DPP_instruction& dpp() const noexcept {assert(isDPP()); return *(DPP_instruction *)this;}
    constexpr bool isDPP() const noexcept {return (uint16_t)format & (uint16_t)Format::DPP;}
-   SDWA_instruction *sdwa() noexcept {assert(isSDWA()); return (SDWA_instruction *)this;}
-   const SDWA_instruction *sdwa() const noexcept {assert(isSDWA()); return (SDWA_instruction *)this;}
+   SDWA_instruction& sdwa() noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;}
+   const SDWA_instruction& sdwa() const noexcept {assert(isSDWA()); return *(SDWA_instruction *)this;}
    constexpr bool isSDWA() const noexcept {return (uint16_t)format & (uint16_t)Format::SDWA;}
 
-   FLAT_instruction *flatlike()
+   FLAT_instruction& flatlike()
    {
-      return (FLAT_instruction *)this;
+      return *(FLAT_instruction *)this;
    }
 
-   const FLAT_instruction *flatlike() const
+   const FLAT_instruction& flatlike() const
    {
-      return (FLAT_instruction *)this;
+      return *(FLAT_instruction *)this;
    }
 
    constexpr bool isFlatLike() const noexcept
@@ -1524,23 +1524,23 @@ constexpr bool Instruction::usesModifiers() const noexcept
       return true;
 
    if (isVOP3P()) {
-      const VOP3P_instruction *vop3p = this->vop3p();
+      const VOP3P_instruction& vop3p = this->vop3p();
       for (unsigned i = 0; i < operands.size(); i++) {
-         if (vop3p->neg_lo[i] || vop3p->neg_hi[i])
+         if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
             return true;
 
          /* opsel_hi must be 1 to not be considered a modifier - even for constants */
-         if (!(vop3p->opsel_hi & (1 << i)))
+         if (!(vop3p.opsel_hi & (1 << i)))
             return true;
       }
-      return vop3p->opsel_lo || vop3p->clamp;
+      return vop3p.opsel_lo || vop3p.clamp;
    } else if (isVOP3()) {
-      const VOP3_instruction *vop3 = this->vop3();
+      const VOP3_instruction& vop3 = this->vop3();
       for (unsigned i = 0; i < operands.size(); i++) {
-         if (vop3->abs[i] || vop3->neg[i])
+         if (vop3.abs[i] || vop3.neg[i])
             return true;
       }
-      return vop3->opsel || vop3->clamp || vop3->omod;
+      return vop3.opsel || vop3.clamp || vop3.omod;
    }
    return false;
 }
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 180b38e..a212972 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
                                          Definition(PhysReg{vtmp+i}, v1),
                                          Operand(PhysReg{tmp+i}, v1),
                                          Operand(0xffffffffu), Operand(0xffffffffu)).instr;
-            perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */
+            perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
          }
          bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
 
@@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
                                          Definition(PhysReg{vtmp+i}, v1),
                                          Operand(PhysReg{tmp+i}, v1),
                                          Operand(0xffffffffu), Operand(0xffffffffu)).instr;
-            perm->vop3()->opsel = 1; /* FI (Fetch Inactive) */
+            perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
          }
          emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
 
@@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
          if (dst.physReg().byte() == 2) {
             Operand def_lo(dst.physReg().advance(-2), v2b);
             Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
-            instr->vop3()->opsel = 0;
+            instr->vop3().opsel = 0;
          } else {
             assert(dst.physReg().byte() == 0);
             Operand def_hi(dst.physReg().advance(2), v2b);
             Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
-            instr->vop3()->opsel = 2;
+            instr->vop3().opsel = 2;
          }
       } else {
          uint32_t offset = dst.physReg().byte() * 8u;
@@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
    if (can_use_pack) {
       Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
       /* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
-      instr->vop3()->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
+      instr->vop3().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
       return;
    }
 
@@ -1810,7 +1810,7 @@ void lower_to_hw_instr(Program* program)
          aco_ptr<Instruction>& instr = block->instructions[instr_idx];
          aco_ptr<Instruction> mov;
          if (instr->isPseudo() && instr->opcode != aco_opcode::p_unit_test) {
-            Pseudo_instruction *pi = instr->pseudo();
+            Pseudo_instruction *pi = &instr->pseudo();
 
             switch (instr->opcode)
             {
@@ -1897,7 +1897,7 @@ void lower_to_hw_instr(Program* program)
                          instr2->opcode == aco_opcode::p_logical_end)
                         continue;
                      else if (instr2->opcode == aco_opcode::exp &&
-                              instr2->exp()->dest == null_exp_dest)
+                              instr2->exp().dest == null_exp_dest)
                         continue;
                      else if (instr2->opcode == aco_opcode::p_parallelcopy &&
                          instr2->definitions[0].isFixed() &&
@@ -1983,7 +1983,7 @@ void lower_to_hw_instr(Program* program)
                break;
             }
          } else if (instr->isBranch()) {
-            Pseudo_branch_instruction* branch = instr->branch();
+            Pseudo_branch_instruction* branch = &instr->branch();
             uint32_t target = branch->target[0];
 
             /* check if all blocks from current to target are empty */
@@ -2055,20 +2055,20 @@ void lower_to_hw_instr(Program* program)
             }
 
          } else if (instr->isReduction()) {
-            Pseudo_reduction_instruction* reduce = instr->reduction();
-            emit_reduction(&ctx, reduce->opcode, reduce->reduce_op, reduce->cluster_size,
-                           reduce->operands[1].physReg(), // tmp
-                           reduce->definitions[1].physReg(), // stmp
-                           reduce->operands[2].physReg(), // vtmp
-                           reduce->definitions[2].physReg(), // sitmp
-                           reduce->operands[0], reduce->definitions[0]);
+            Pseudo_reduction_instruction& reduce = instr->reduction();
+            emit_reduction(&ctx, reduce.opcode, reduce.reduce_op, reduce.cluster_size,
+                           reduce.operands[1].physReg(), // tmp
+                           reduce.definitions[1].physReg(), // stmp
+                           reduce.operands[2].physReg(), // vtmp
+                           reduce.definitions[2].physReg(), // sitmp
+                           reduce.operands[0], reduce.definitions[0]);
          } else if (instr->isBarrier()) {
-            Pseudo_barrier_instruction* barrier = instr->barrier();
+            Pseudo_barrier_instruction& barrier = instr->barrier();
 
             /* Anything larger than a workgroup isn't possible. Anything
              * smaller requires no instructions and this pseudo instruction
              * would only be included to control optimizations. */
-            bool emit_s_barrier = barrier->exec_scope == scope_workgroup &&
+            bool emit_s_barrier = barrier.exec_scope == scope_workgroup &&
                                   program->workgroup_size > program->wave_size;
 
             bld.insert(std::move(instr));
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index de0eb94..42aa701 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -178,121 +178,121 @@ struct InstrPred {
          return false;
 
       if (a->isVOP3()) {
-         VOP3_instruction* a3 = a->vop3();
-         VOP3_instruction* b3 = b->vop3();
+         VOP3_instruction& a3 = a->vop3();
+         VOP3_instruction& b3 = b->vop3();
          for (unsigned i = 0; i < 3; i++) {
-            if (a3->abs[i] != b3->abs[i] ||
-                a3->neg[i] != b3->neg[i])
+            if (a3.abs[i] != b3.abs[i] ||
+                a3.neg[i] != b3.neg[i])
                return false;
          }
-         return a3->clamp == b3->clamp &&
-                a3->omod == b3->omod &&
-                a3->opsel == b3->opsel;
+         return a3.clamp == b3.clamp &&
+                a3.omod == b3.omod &&
+                a3.opsel == b3.opsel;
       }
       if (a->isDPP()) {
-         DPP_instruction* aDPP = a->dpp();
-         DPP_instruction* bDPP = b->dpp();
-         return aDPP->pass_flags == bDPP->pass_flags &&
-                aDPP->dpp_ctrl == bDPP->dpp_ctrl &&
-                aDPP->bank_mask == bDPP->bank_mask &&
-                aDPP->row_mask == bDPP->row_mask &&
-                aDPP->bound_ctrl == bDPP->bound_ctrl &&
-                aDPP->abs[0] == bDPP->abs[0] &&
-                aDPP->abs[1] == bDPP->abs[1] &&
-                aDPP->neg[0] == bDPP->neg[0] &&
-                aDPP->neg[1] == bDPP->neg[1];
+         DPP_instruction& aDPP = a->dpp();
+         DPP_instruction& bDPP = b->dpp();
+         return aDPP.pass_flags == bDPP.pass_flags &&
+                aDPP.dpp_ctrl == bDPP.dpp_ctrl &&
+                aDPP.bank_mask == bDPP.bank_mask &&
+                aDPP.row_mask == bDPP.row_mask &&
+                aDPP.bound_ctrl == bDPP.bound_ctrl &&
+                aDPP.abs[0] == bDPP.abs[0] &&
+                aDPP.abs[1] == bDPP.abs[1] &&
+                aDPP.neg[0] == bDPP.neg[0] &&
+                aDPP.neg[1] == bDPP.neg[1];
       }
       if (a->isSDWA()) {
-         SDWA_instruction* aSDWA = a->sdwa();
-         SDWA_instruction* bSDWA = b->sdwa();
-         return aSDWA->sel[0] == bSDWA->sel[0] &&
-                aSDWA->sel[1] == bSDWA->sel[1] &&
-                aSDWA->dst_sel == bSDWA->dst_sel &&
-                aSDWA->abs[0] == bSDWA->abs[0] &&
-                aSDWA->abs[1] == bSDWA->abs[1] &&
-                aSDWA->neg[0] == bSDWA->neg[0] &&
-                aSDWA->neg[1] == bSDWA->neg[1] &&
-                aSDWA->dst_preserve == bSDWA->dst_preserve &&
-                aSDWA->clamp == bSDWA->clamp &&
-                aSDWA->omod == bSDWA->omod;
+         SDWA_instruction& aSDWA = a->sdwa();
+         SDWA_instruction& bSDWA = b->sdwa();
+         return aSDWA.sel[0] == bSDWA.sel[0] &&
+                aSDWA.sel[1] == bSDWA.sel[1] &&
+                aSDWA.dst_sel == bSDWA.dst_sel &&
+                aSDWA.abs[0] == bSDWA.abs[0] &&
+                aSDWA.abs[1] == bSDWA.abs[1] &&
+                aSDWA.neg[0] == bSDWA.neg[0] &&
+                aSDWA.neg[1] == bSDWA.neg[1] &&
+                aSDWA.dst_preserve == bSDWA.dst_preserve &&
+                aSDWA.clamp == bSDWA.clamp &&
+                aSDWA.omod == bSDWA.omod;
       }
 
       switch (a->format) {
          case Format::SOPK: {
             if (a->opcode == aco_opcode::s_getreg_b32)
                return false;
-            SOPK_instruction* aK = a->sopk();
-            SOPK_instruction* bK = b->sopk();
-            return aK->imm == bK->imm;
+            SOPK_instruction& aK = a->sopk();
+            SOPK_instruction& bK = b->sopk();
+            return aK.imm == bK.imm;
          }
          case Format::SMEM: {
-            SMEM_instruction* aS = a->smem();
-            SMEM_instruction* bS = b->smem();
+            SMEM_instruction& aS = a->smem();
+            SMEM_instruction& bS = b->smem();
             /* isel shouldn't be creating situations where this assertion fails */
-            assert(aS->prevent_overflow == bS->prevent_overflow);
-            return aS->sync.can_reorder() && bS->sync.can_reorder() &&
-                   aS->sync == bS->sync && aS->glc == bS->glc && aS->dlc == bS->dlc &&
-                   aS->nv == bS->nv && aS->disable_wqm == bS->disable_wqm &&
-                   aS->prevent_overflow == bS->prevent_overflow;
+            assert(aS.prevent_overflow == bS.prevent_overflow);
+            return aS.sync.can_reorder() && bS.sync.can_reorder() &&
+                   aS.sync == bS.sync && aS.glc == bS.glc && aS.dlc == bS.dlc &&
+                   aS.nv == bS.nv && aS.disable_wqm == bS.disable_wqm &&
+                   aS.prevent_overflow == bS.prevent_overflow;
          }
          case Format::VINTRP: {
-            Interp_instruction* aI = a->vintrp();
-            Interp_instruction* bI = b->vintrp();
-            if (aI->attribute != bI->attribute)
+            Interp_instruction& aI = a->vintrp();
+            Interp_instruction& bI = b->vintrp();
+            if (aI.attribute != bI.attribute)
                return false;
-            if (aI->component != bI->component)
+            if (aI.component != bI.component)
                return false;
             return true;
          }
          case Format::VOP3P: {
-            VOP3P_instruction* a3P = a->vop3p();
-            VOP3P_instruction* b3P = b->vop3p();
+            VOP3P_instruction& a3P = a->vop3p();
+            VOP3P_instruction& b3P = b->vop3p();
             for (unsigned i = 0; i < 3; i++) {
-               if (a3P->neg_lo[i] != b3P->neg_lo[i] ||
-                   a3P->neg_hi[i] != b3P->neg_hi[i])
+               if (a3P.neg_lo[i] != b3P.neg_lo[i] ||
+                   a3P.neg_hi[i] != b3P.neg_hi[i])
                   return false;
             }
-            return a3P->opsel_lo == b3P->opsel_lo &&
-                   a3P->opsel_hi == b3P->opsel_hi &&
-                   a3P->clamp == b3P->clamp;
+            return a3P.opsel_lo == b3P.opsel_lo &&
+                   a3P.opsel_hi == b3P.opsel_hi &&
+                   a3P.clamp == b3P.clamp;
          }
          case Format::PSEUDO_REDUCTION: {
-            Pseudo_reduction_instruction *aR = a->reduction();
-            Pseudo_reduction_instruction *bR = b->reduction();
-            return aR->pass_flags == bR->pass_flags &&
-                   aR->reduce_op == bR->reduce_op &&
-                   aR->cluster_size == bR->cluster_size;
+            Pseudo_reduction_instruction& aR = a->reduction();
+            Pseudo_reduction_instruction& bR = b->reduction();
+            return aR.pass_flags == bR.pass_flags &&
+                   aR.reduce_op == bR.reduce_op &&
+                   aR.cluster_size == bR.cluster_size;
          }
          case Format::MTBUF: {
-            MTBUF_instruction* aM = a->mtbuf();
-            MTBUF_instruction* bM = b->mtbuf();
-            return aM->sync.can_reorder() && bM->sync.can_reorder() &&
-                   aM->sync == bM->sync &&
-                   aM->dfmt == bM->dfmt &&
-                   aM->nfmt == bM->nfmt &&
-                   aM->offset == bM->offset &&
-                   aM->offen == bM->offen &&
-                   aM->idxen == bM->idxen &&
-                   aM->glc == bM->glc &&
-                   aM->dlc == bM->dlc &&
-                   aM->slc == bM->slc &&
-                   aM->tfe == bM->tfe &&
-                   aM->disable_wqm == bM->disable_wqm;
+            MTBUF_instruction& aM = a->mtbuf();
+            MTBUF_instruction& bM = b->mtbuf();
+            return aM.sync.can_reorder() && bM.sync.can_reorder() &&
+                   aM.sync == bM.sync &&
+                   aM.dfmt == bM.dfmt &&
+                   aM.nfmt == bM.nfmt &&
+                   aM.offset == bM.offset &&
+                   aM.offen == bM.offen &&
+                   aM.idxen == bM.idxen &&
+                   aM.glc == bM.glc &&
+                   aM.dlc == bM.dlc &&
+                   aM.slc == bM.slc &&
+                   aM.tfe == bM.tfe &&
+                   aM.disable_wqm == bM.disable_wqm;
          }
          case Format::MUBUF: {
-            MUBUF_instruction* aM = a->mubuf();
-            MUBUF_instruction* bM = b->mubuf();
-            return aM->sync.can_reorder() && bM->sync.can_reorder() &&
-                   aM->sync == bM->sync &&
-                   aM->offset == bM->offset &&
-                   aM->offen == bM->offen &&
-                   aM->idxen == bM->idxen &&
-                   aM->glc == bM->glc &&
-                   aM->dlc == bM->dlc &&
-                   aM->slc == bM->slc &&
-                   aM->tfe == bM->tfe &&
-                   aM->lds == bM->lds &&
-                   aM->disable_wqm == bM->disable_wqm;
+            MUBUF_instruction& aM = a->mubuf();
+            MUBUF_instruction& bM = b->mubuf();
+            return aM.sync.can_reorder() && bM.sync.can_reorder() &&
+                   aM.sync == bM.sync &&
+                   aM.offset == bM.offset &&
+                   aM.offen == bM.offen &&
+                   aM.idxen == bM.idxen &&
+                   aM.glc == bM.glc &&
+                   aM.dlc == bM.dlc &&
+                   aM.slc == bM.slc &&
+                   aM.tfe == bM.tfe &&
+                   aM.lds == bM.lds &&
+                   aM.disable_wqm == bM.disable_wqm;
          }
          /* we want to optimize these in NIR and don't hassle with load-store dependencies */
          case Format::FLAT:
@@ -308,31 +308,31 @@ struct InstrPred {
                 a->opcode != aco_opcode::ds_permute_b32 &&
                 a->opcode != aco_opcode::ds_swizzle_b32)
                return false;
-            DS_instruction* aD = a->ds();
-            DS_instruction* bD = b->ds();
-            return aD->sync.can_reorder() && bD->sync.can_reorder() &&
-                   aD->sync == bD->sync &&
-                   aD->pass_flags == bD->pass_flags &&
-                   aD->gds == bD->gds &&
-                   aD->offset0 == bD->offset0 &&
-                   aD->offset1 == bD->offset1;
+            DS_instruction& aD = a->ds();
+            DS_instruction& bD = b->ds();
+            return aD.sync.can_reorder() && bD.sync.can_reorder() &&
+                   aD.sync == bD.sync &&
+                   aD.pass_flags == bD.pass_flags &&
+                   aD.gds == bD.gds &&
+                   aD.offset0 == bD.offset0 &&
+                   aD.offset1 == bD.offset1;
          }
          case Format::MIMG: {
-            MIMG_instruction* aM = a->mimg();
-            MIMG_instruction* bM = b->mimg();
-            return aM->sync.can_reorder() && bM->sync.can_reorder() &&
-                   aM->sync == bM->sync &&
-                   aM->dmask == bM->dmask &&
-                   aM->unrm == bM->unrm &&
-                   aM->glc == bM->glc &&
-                   aM->slc == bM->slc &&
-                   aM->tfe == bM->tfe &&
-                   aM->da == bM->da &&
-                   aM->lwe == bM->lwe &&
-                   aM->r128 == bM->r128 &&
-                   aM->a16 == bM->a16 &&
-                   aM->d16 == bM->d16 &&
-                   aM->disable_wqm == bM->disable_wqm;
+            MIMG_instruction& aM = a->mimg();
+            MIMG_instruction& bM = b->mimg();
+            return aM.sync.can_reorder() && bM.sync.can_reorder() &&
+                   aM.sync == bM.sync &&
+                   aM.dmask == bM.dmask &&
+                   aM.unrm == bM.unrm &&
+                   aM.glc == bM.glc &&
+                   aM.slc == bM.slc &&
+                   aM.tfe == bM.tfe &&
+                   aM.da == bM.da &&
+                   aM.lwe == bM.lwe &&
+                   aM.r128 == bM.r128 &&
+                   aM.a16 == bM.a16 &&
+                   aM.d16 == bM.d16 &&
+                   aM.disable_wqm == bM.disable_wqm;
          }
          default:
             return true;
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 63bb9b1..3cffcb1 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -940,7 +940,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
          can_use_mod = can_use_mod && instr_info.can_use_input_modifiers[(int)instr->opcode];
 
          if (instr->isSDWA())
-            can_use_mod = can_use_mod && (instr->sdwa()->sel[i] & sdwa_asuint) == sdwa_udword;
+            can_use_mod = can_use_mod && (instr->sdwa().sel[i] & sdwa_asuint) == sdwa_udword;
          else
             can_use_mod = can_use_mod && (instr->isDPP() || can_use_VOP3(ctx, instr));
 
@@ -949,11 +949,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
                to_VOP3(ctx, instr);
             instr->operands[i] = Operand(info.temp);
             if (instr->isDPP())
-               instr->dpp()->abs[i] = true;
+               instr->dpp().abs[i] = true;
             else if (instr->isSDWA())
-               instr->sdwa()->abs[i] = true;
+               instr->sdwa().abs[i] = true;
             else
-               instr->vop3()->abs[i] = true;
+               instr->vop3().abs[i] = true;
          }
          if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
             instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
@@ -968,11 +968,11 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
                to_VOP3(ctx, instr);
             instr->operands[i].setTemp(info.temp);
             if (instr->isDPP())
-               instr->dpp()->neg[i] = true;
+               instr->dpp().neg[i] = true;
             else if (instr->isSDWA())
-               instr->sdwa()->neg[i] = true;
+               instr->sdwa().neg[i] = true;
             else
-               instr->vop3()->neg[i] = true;
+               instr->vop3().neg[i] = true;
             continue;
          }
          unsigned bits = get_operand_size(instr, i);
@@ -999,7 +999,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
 
       /* MUBUF: propagate constants and combine additions */
       else if (instr->isMUBUF()) {
-         MUBUF_instruction *mubuf = instr->mubuf();
+         MUBUF_instruction& mubuf = instr->mubuf();
          Temp base;
          uint32_t offset;
          while (info.is_temp())
@@ -1011,29 +1011,29 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
           * scratch accesses and other accesses and swizzling changing how
           * addressing works significantly, this probably applies to swizzled
           * MUBUF accesses. */
-         bool vaddr_prevent_overflow = mubuf->swizzled && ctx.program->chip_class < GFX9;
-         bool saddr_prevent_overflow = mubuf->swizzled;
+         bool vaddr_prevent_overflow = mubuf.swizzled && ctx.program->chip_class < GFX9;
+         bool saddr_prevent_overflow = mubuf.swizzled;
 
-         if (mubuf->offen && i == 1 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
-            assert(!mubuf->idxen);
+         if (mubuf.offen && i == 1 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
+            assert(!mubuf.idxen);
             instr->operands[1] = Operand(v1);
-            mubuf->offset += info.val;
-            mubuf->offen = false;
+            mubuf.offset += info.val;
+            mubuf.offen = false;
             continue;
-         } else if (i == 2 && info.is_constant_or_literal(32) && mubuf->offset + info.val < 4096) {
+         } else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) {
             instr->operands[2] = Operand((uint32_t) 0);
-            mubuf->offset += info.val;
+            mubuf.offset += info.val;
             continue;
-         } else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
-                    base.regClass() == v1 && mubuf->offset + offset < 4096) {
-            assert(!mubuf->idxen);
+         } else if (mubuf.offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, vaddr_prevent_overflow) &&
+                    base.regClass() == v1 && mubuf.offset + offset < 4096) {
+            assert(!mubuf.idxen);
             instr->operands[1].setTemp(base);
-            mubuf->offset += offset;
+            mubuf.offset += offset;
             continue;
          } else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset, saddr_prevent_overflow) &&
-                    base.regClass() == s1 && mubuf->offset + offset < 4096) {
+                    base.regClass() == s1 && mubuf.offset + offset < 4096) {
             instr->operands[i].setTemp(base);
-            mubuf->offset += offset;
+            mubuf.offset += offset;
             continue;
          }
       }
@@ -1041,7 +1041,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
       /* DS: combine additions */
       else if (instr->isDS()) {
 
-         DS_instruction *ds = instr->ds();
+         DS_instruction& ds = instr->ds();
          Temp base;
          uint32_t offset;
          bool has_usable_ds_offset = ctx.program->chip_class >= GFX7;
@@ -1055,16 +1055,16 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
                unsigned shifts = (instr->opcode == aco_opcode::ds_write2_b64 || instr->opcode == aco_opcode::ds_read2_b64) ? 3 : 2;
 
                if ((offset & mask) == 0 &&
-                   ds->offset0 + (offset >> shifts) <= 255 &&
-                   ds->offset1 + (offset >> shifts) <= 255) {
+                   ds.offset0 + (offset >> shifts) <= 255 &&
+                   ds.offset1 + (offset >> shifts) <= 255) {
                   instr->operands[i].setTemp(base);
-                  ds->offset0 += offset >> shifts;
-                  ds->offset1 += offset >> shifts;
+                  ds.offset0 += offset >> shifts;
+                  ds.offset1 += offset >> shifts;
                }
             } else {
-               if (ds->offset0 + offset <= 65535) {
+               if (ds.offset0 + offset <= 65535) {
                   instr->operands[i].setTemp(base);
-                  ds->offset0 += offset;
+                  ds.offset0 += offset;
                }
             }
          }
@@ -1073,10 +1073,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
       /* SMEM: propagate constants and combine additions */
       else if (instr->isSMEM()) {
 
-         SMEM_instruction *smem = instr->smem();
+         SMEM_instruction& smem = instr->smem();
          Temp base;
          uint32_t offset;
-         bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow;
+         bool prevent_overflow = smem.operands[0].size() > 2 || smem.prevent_overflow;
          if (i == 1 && info.is_constant_or_literal(32) &&
              ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
               (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
@@ -1084,31 +1084,30 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
             instr->operands[i] = Operand(info.val);
             continue;
          } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
-            bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4);
+            bool soe = smem.operands.size() >= (!smem.definitions.empty() ? 3 : 4);
             if (soe &&
-                (!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) ||
-                 ctx.info[smem->operands.back().tempId()].val != 0)) {
+                (!ctx.info[smem.operands.back().tempId()].is_constant_or_literal(32) ||
+                 ctx.info[smem.operands.back().tempId()].val != 0)) {
                continue;
             }
             if (soe) {
-               smem->operands[1] = Operand(offset);
-               smem->operands.back() = Operand(base);
+               smem.operands[1] = Operand(offset);
+               smem.operands.back() = Operand(base);
             } else {
-               SMEM_instruction *new_instr = create_instruction<SMEM_instruction>(smem->opcode, Format::SMEM, smem->operands.size() + 1, smem->definitions.size());
-               new_instr->operands[0] = smem->operands[0];
+               SMEM_instruction *new_instr = create_instruction<SMEM_instruction>(smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size());
+               new_instr->operands[0] = smem.operands[0];
                new_instr->operands[1] = Operand(offset);
-               if (smem->definitions.empty())
-                  new_instr->operands[2] = smem->operands[2];
+               if (smem.definitions.empty())
+                  new_instr->operands[2] = smem.operands[2];
                new_instr->operands.back() = Operand(base);
-               if (!smem->definitions.empty())
-                  new_instr->definitions[0] = smem->definitions[0];
-               new_instr->sync = smem->sync;
-               new_instr->glc = smem->glc;
-               new_instr->dlc = smem->dlc;
-               new_instr->nv = smem->nv;
-               new_instr->disable_wqm = smem->disable_wqm;
+               if (!smem.definitions.empty())
+                  new_instr->definitions[0] = smem.definitions[0];
+               new_instr->sync = smem.sync;
+               new_instr->glc = smem.glc;
+               new_instr->dlc = smem.dlc;
+               new_instr->nv = smem.nv;
+               new_instr->disable_wqm = smem.disable_wqm;
                instr.reset(new_instr);
-               smem = instr->smem();
             }
             continue;
          }
@@ -1365,10 +1364,10 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
    }
    case aco_opcode::v_med3_f16:
    case aco_opcode::v_med3_f32: { /* clamp */
-      VOP3_instruction* vop3 = instr->vop3();
-      if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
-          vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
-          vop3->omod != 0 || vop3->opsel != 0)
+      VOP3_instruction& vop3 = instr->vop3();
+      if (vop3.abs[0] || vop3.abs[1] || vop3.abs[2] ||
+          vop3.neg[0] || vop3.neg[1] || vop3.neg[2] ||
+          vop3.omod != 0 || vop3.opsel != 0)
          break;
 
       unsigned idx = 0;
@@ -1682,12 +1681,12 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
          return false;
 
       if (op_instr[i]->isVOP3()) {
-         VOP3_instruction *vop3 = op_instr[i]->vop3();
-         if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
+         VOP3_instruction& vop3 = op_instr[i]->vop3();
+         if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2)
             return false;
-         neg[i] = vop3->neg[0];
-         abs[i] = vop3->abs[0];
-         opsel |= (vop3->opsel & 1) << i;
+         neg[i] = vop3.neg[0];
+         abs[i] = vop3.abs[0];
+         opsel |= (vop3.opsel & 1) << i;
       } else if (op_instr[i]->isSDWA()) {
          return false;
       }
@@ -1798,12 +1797,12 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
    Instruction *new_instr;
    if (cmp->isVOP3()) {
       VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
-      VOP3_instruction *cmp_vop3 = cmp->vop3();
-      memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
-      memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
-      new_vop3->clamp = cmp_vop3->clamp;
-      new_vop3->omod = cmp_vop3->omod;
-      new_vop3->opsel = cmp_vop3->opsel;
+      VOP3_instruction& cmp_vop3 = cmp->vop3();
+      memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
+      memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
+      new_vop3->clamp = cmp_vop3.clamp;
+      new_vop3->omod = cmp_vop3.omod;
+      new_vop3->opsel = cmp_vop3.opsel;
       new_instr = new_vop3;
    } else {
       new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
@@ -1885,8 +1884,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
       return false;
 
    if (nan_test->isVOP3()) {
-      VOP3_instruction *vop3 = nan_test->vop3();
-      if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
+      VOP3_instruction& vop3 = nan_test->vop3();
+      if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 || vop3.opsel == 2)
          return false;
    }
 
@@ -1917,12 +1916,12 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
    Instruction *new_instr;
    if (cmp->isVOP3()) {
       VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
-      VOP3_instruction *cmp_vop3 = cmp->vop3();
-      memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
-      memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
-      new_vop3->clamp = cmp_vop3->clamp;
-      new_vop3->omod = cmp_vop3->omod;
-      new_vop3->opsel = cmp_vop3->opsel;
+      VOP3_instruction& cmp_vop3 = cmp->vop3();
+      memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
+      memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
+      new_vop3->clamp = cmp_vop3.clamp;
+      new_vop3->omod = cmp_vop3.omod;
+      new_vop3->opsel = cmp_vop3.opsel;
       new_instr = new_vop3;
    } else {
       new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
@@ -1966,24 +1965,24 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
    Instruction *new_instr;
    if (cmp->isVOP3()) {
       VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
-      VOP3_instruction *cmp_vop3 = cmp->vop3();
-      memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
-      memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
-      new_vop3->clamp = cmp_vop3->clamp;
-      new_vop3->omod = cmp_vop3->omod;
-      new_vop3->opsel = cmp_vop3->opsel;
+      VOP3_instruction& cmp_vop3 = cmp->vop3();
+      memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
+      memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
+      new_vop3->clamp = cmp_vop3.clamp;
+      new_vop3->omod = cmp_vop3.omod;
+      new_vop3->opsel = cmp_vop3.opsel;
       new_instr = new_vop3;
    } else if (cmp->isSDWA()) {
       SDWA_instruction *new_sdwa = create_instruction<SDWA_instruction>(
          new_opcode, (Format)((uint16_t)Format::SDWA | (uint16_t)Format::VOPC), 2, 1);
-      SDWA_instruction *cmp_sdwa = cmp->sdwa();
-      memcpy(new_sdwa->abs, cmp_sdwa->abs, sizeof(new_sdwa->abs));
-      memcpy(new_sdwa->sel, cmp_sdwa->sel, sizeof(new_sdwa->sel));
-      memcpy(new_sdwa->neg, cmp_sdwa->neg, sizeof(new_sdwa->neg));
-      new_sdwa->dst_sel = cmp_sdwa->dst_sel;
-      new_sdwa->dst_preserve = cmp_sdwa->dst_preserve;
-      new_sdwa->clamp = cmp_sdwa->clamp;
-      new_sdwa->omod = cmp_sdwa->omod;
+      SDWA_instruction& cmp_sdwa = cmp->sdwa();
+      memcpy(new_sdwa->abs, cmp_sdwa.abs, sizeof(new_sdwa->abs));
+      memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel));
+      memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg));
+      new_sdwa->dst_sel = cmp_sdwa.dst_sel;
+      new_sdwa->dst_preserve = cmp_sdwa.dst_preserve;
+      new_sdwa->clamp = cmp_sdwa.clamp;
+      new_sdwa->omod = cmp_sdwa.omod;
       new_instr = new_sdwa;
    } else {
       new_instr = create_instruction<VOPC_instruction>(new_opcode, Format::VOPC, 2, 1);
@@ -2019,8 +2018,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
    if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1]))
       return false;
 
-   VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? op1_instr->vop3() : NULL;
-   VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? op2_instr->vop3() : NULL;
+   VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? &op1_instr->vop3() : NULL;
+   VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? &op2_instr->vop3() : NULL;
 
    if (op1_instr->isSDWA() || op2_instr->isSDWA())
       return false;
@@ -2641,11 +2640,11 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
    assert(!ctx.info[instr->definitions[0].tempId()].is_mad());
 
    if (instr->isSDWA()) {
-      if (!apply_omod_clamp_helper(ctx, instr->sdwa(), def_info))
+      if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info))
          return false;
    } else {
       to_VOP3(ctx, instr);
-      if (!apply_omod_clamp_helper(ctx, instr->vop3(), def_info))
+      if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))
          return false;
    }
 
@@ -2767,7 +2766,7 @@ void propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opse
 
 void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
 {
-   VOP3P_instruction* vop3p = instr->vop3p();
+   VOP3P_instruction* vop3p = &instr->vop3p();
 
    /* apply clamp */
    if (instr->opcode == aco_opcode::v_pk_mul_f16 &&
@@ -2778,7 +2777,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
 
       ssa_info& info = ctx.info[instr->operands[0].tempId()];
       if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
-         VOP3P_instruction* candidate = ctx.info[instr->operands[0].tempId()].instr->vop3p();
+         VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
          candidate->clamp = true;
          propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi);
          std::swap(instr->definitions[0], candidate->definitions[0]);
@@ -2804,7 +2803,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
             if (!check_vop3_operands(ctx, 2, ops))
                continue;
 
-            VOP3P_instruction* fneg = info.instr->vop3p();
+            VOP3P_instruction* fneg = &info.instr->vop3p();
             if (fneg->clamp)
                continue;
             instr->operands[i] = fneg->operands[0];
@@ -2849,7 +2848,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
             continue;
 
          /* no clamp allowed between mul and add */
-         if (info.instr->vop3p()->clamp)
+         if (info.instr->vop3p().clamp)
             continue;
 
          mul_instr = info.instr;
@@ -2875,7 +2874,7 @@ void combine_vop3p(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
       /* turn packed mul+add into v_pk_fma_f16 */
       assert(mul_instr->isVOP3P());
       aco_ptr<VOP3P_instruction> fma{create_instruction<VOP3P_instruction>(aco_opcode::v_pk_fma_f16, Format::VOP3P, 3, 1)};
-      VOP3P_instruction* mul = mul_instr->vop3p();
+      VOP3P_instruction* mul = &mul_instr->vop3p();
       for (unsigned i = 0; i < 2; i++) {
          fma->operands[i] = op[i];
          fma->neg_lo[i] = mul->neg_lo[i];
@@ -2944,7 +2943,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
 
       if (mul_instr->operands[0].isLiteral())
          return;
-      if (mul_instr->isVOP3() && mul_instr->vop3()->clamp)
+      if (mul_instr->isVOP3() && mul_instr->vop3().clamp)
          return;
       if (mul_instr->isSDWA())
          return;
@@ -2958,17 +2957,17 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
       instr->operands[0] = mul_instr->operands[0];
       instr->operands[1] = mul_instr->operands[1];
       instr->definitions[0] = def;
-      VOP3_instruction* new_mul = instr->vop3();
+      VOP3_instruction& new_mul = instr->vop3();
       if (mul_instr->isVOP3()) {
-         VOP3_instruction* mul = mul_instr->vop3();
-         new_mul->neg[0] = mul->neg[0] && !is_abs;
-         new_mul->neg[1] = mul->neg[1] && !is_abs;
-         new_mul->abs[0] = mul->abs[0] || is_abs;
-         new_mul->abs[1] = mul->abs[1] || is_abs;
-         new_mul->omod = mul->omod;
+         VOP3_instruction& mul = mul_instr->vop3();
+         new_mul.neg[0] = mul.neg[0] && !is_abs;
+         new_mul.neg[1] = mul.neg[1] && !is_abs;
+         new_mul.abs[0] = mul.abs[0] || is_abs;
+         new_mul.abs[1] = mul.abs[1] || is_abs;
+         new_mul.omod = mul.omod;
       }
-      new_mul->neg[0] ^= true;
-      new_mul->clamp = false;
+      new_mul.neg[0] ^= true;
+      new_mul.clamp = false;
 
       ctx.info[instr->definitions[0].tempId()].set_mul(instr.get());
       return;
@@ -3002,7 +3001,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
             continue;
 
          /* no clamp/omod allowed between mul and add */
-         if (info.instr->isVOP3() && (info.instr->vop3()->clamp || info.instr->vop3()->omod))
+         if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
             continue;
 
          Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
@@ -3033,28 +3032,28 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
          bool clamp = false;
 
          if (mul_instr->isVOP3()) {
-            VOP3_instruction* vop3 = mul_instr->vop3();
-            neg[0] = vop3->neg[0];
-            neg[1] = vop3->neg[1];
-            abs[0] = vop3->abs[0];
-            abs[1] = vop3->abs[1];
+            VOP3_instruction& vop3 = mul_instr->vop3();
+            neg[0] = vop3.neg[0];
+            neg[1] = vop3.neg[1];
+            abs[0] = vop3.abs[0];
+            abs[1] = vop3.abs[1];
          }
 
          if (instr->isVOP3()) {
-            VOP3_instruction* vop3 = instr->vop3();
-            neg[2] = vop3->neg[add_op_idx];
-            abs[2] = vop3->abs[add_op_idx];
-            omod = vop3->omod;
-            clamp = vop3->clamp;
+            VOP3_instruction& vop3 = instr->vop3();
+            neg[2] = vop3.neg[add_op_idx];
+            abs[2] = vop3.abs[add_op_idx];
+            omod = vop3.omod;
+            clamp = vop3.clamp;
             /* abs of the multiplication result */
-            if (vop3->abs[1 - add_op_idx]) {
+            if (vop3.abs[1 - add_op_idx]) {
                neg[0] = false;
                neg[1] = false;
                abs[0] = true;
                abs[1] = true;
             }
             /* neg of the multiplication result */
-            neg[1] = neg[1] ^ vop3->neg[1 - add_op_idx];
+            neg[1] = neg[1] ^ vop3.neg[1 - add_op_idx];
          }
          if (instr->opcode == aco_opcode::v_sub_f32 || instr->opcode == aco_opcode::v_sub_f16)
             neg[1 + add_op_idx] = neg[1 + add_op_idx] ^ true;
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index 16d198f..86abb47 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -273,12 +273,12 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
 {
    switch (instr->format) {
    case Format::SOPK: {
-      const SOPK_instruction* sopk = instr->sopk();
-      fprintf(output, " imm:%d", sopk->imm & 0x8000 ? (sopk->imm - 65536) : sopk->imm);
+      const SOPK_instruction& sopk = instr->sopk();
+      fprintf(output, " imm:%d", sopk.imm & 0x8000 ? (sopk.imm - 65536) : sopk.imm);
       break;
    }
    case Format::SOPP: {
-      uint16_t imm = instr->sopp()->imm;
+      uint16_t imm = instr->sopp().imm;
       switch (instr->opcode) {
       case aco_opcode::s_waitcnt: {
          /* we usually should check the chip class for vmcnt/lgkm, but
@@ -340,74 +340,74 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
          break;
       }
       }
-      if (instr->sopp()->block != -1)
-         fprintf(output, " block:BB%d", instr->sopp()->block);
+      if (instr->sopp().block != -1)
+         fprintf(output, " block:BB%d", instr->sopp().block);
       break;
    }
    case Format::SMEM: {
-      const SMEM_instruction* smem = instr->smem();
-      if (smem->glc)
+      const SMEM_instruction& smem = instr->smem();
+      if (smem.glc)
          fprintf(output, " glc");
-      if (smem->dlc)
+      if (smem.dlc)
          fprintf(output, " dlc");
-      if (smem->nv)
+      if (smem.nv)
          fprintf(output, " nv");
-      print_sync(smem->sync, output);
+      print_sync(smem.sync, output);
       break;
    }
    case Format::VINTRP: {
-      const Interp_instruction* vintrp = instr->vintrp();
-      fprintf(output, " attr%d.%c", vintrp->attribute, "xyzw"[vintrp->component]);
+      const Interp_instruction& vintrp = instr->vintrp();
+      fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);
       break;
    }
    case Format::DS: {
-      const DS_instruction* ds = instr->ds();
-      if (ds->offset0)
-         fprintf(output, " offset0:%u", ds->offset0);
-      if (ds->offset1)
-         fprintf(output, " offset1:%u", ds->offset1);
-      if (ds->gds)
+      const DS_instruction& ds = instr->ds();
+      if (ds.offset0)
+         fprintf(output, " offset0:%u", ds.offset0);
+      if (ds.offset1)
+         fprintf(output, " offset1:%u", ds.offset1);
+      if (ds.gds)
          fprintf(output, " gds");
-      print_sync(ds->sync, output);
+      print_sync(ds.sync, output);
       break;
    }
    case Format::MUBUF: {
-      const MUBUF_instruction* mubuf = instr->mubuf();
-      if (mubuf->offset)
-         fprintf(output, " offset:%u", mubuf->offset);
-      if (mubuf->offen)
+      const MUBUF_instruction& mubuf = instr->mubuf();
+      if (mubuf.offset)
+         fprintf(output, " offset:%u", mubuf.offset);
+      if (mubuf.offen)
          fprintf(output, " offen");
-      if (mubuf->idxen)
+      if (mubuf.idxen)
          fprintf(output, " idxen");
-      if (mubuf->addr64)
+      if (mubuf.addr64)
          fprintf(output, " addr64");
-      if (mubuf->glc)
+      if (mubuf.glc)
          fprintf(output, " glc");
-      if (mubuf->dlc)
+      if (mubuf.dlc)
          fprintf(output, " dlc");
-      if (mubuf->slc)
+      if (mubuf.slc)
          fprintf(output, " slc");
-      if (mubuf->tfe)
+      if (mubuf.tfe)
          fprintf(output, " tfe");
-      if (mubuf->lds)
+      if (mubuf.lds)
          fprintf(output, " lds");
-      if (mubuf->disable_wqm)
+      if (mubuf.disable_wqm)
          fprintf(output, " disable_wqm");
-      print_sync(mubuf->sync, output);
+      print_sync(mubuf.sync, output);
       break;
    }
    case Format::MIMG: {
-      const MIMG_instruction* mimg = instr->mimg();
+      const MIMG_instruction& mimg = instr->mimg();
       unsigned identity_dmask = !instr->definitions.empty() ?
                                 (1 << instr->definitions[0].size()) - 1 :
                                 0xf;
-      if ((mimg->dmask & identity_dmask) != identity_dmask)
+      if ((mimg.dmask & identity_dmask) != identity_dmask)
          fprintf(output, " dmask:%s%s%s%s",
-                 mimg->dmask & 0x1 ? "x" : "",
-                 mimg->dmask & 0x2 ? "y" : "",
-                 mimg->dmask & 0x4 ? "z" : "",
-                 mimg->dmask & 0x8 ? "w" : "");
-      switch (mimg->dim) {
+                 mimg.dmask & 0x1 ? "x" : "",
+                 mimg.dmask & 0x2 ? "y" : "",
+                 mimg.dmask & 0x4 ? "z" : "",
+                 mimg.dmask & 0x8 ? "w" : "");
+      switch (mimg.dim) {
       case ac_image_1d:
          fprintf(output, " 1d");
          break;
@@ -433,104 +433,104 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
          fprintf(output, " 2darraymsaa");
          break;
       }
-      if (mimg->unrm)
+      if (mimg.unrm)
          fprintf(output, " unrm");
-      if (mimg->glc)
+      if (mimg.glc)
          fprintf(output, " glc");
-      if (mimg->dlc)
+      if (mimg.dlc)
          fprintf(output, " dlc");
-      if (mimg->slc)
+      if (mimg.slc)
          fprintf(output, " slc");
-      if (mimg->tfe)
+      if (mimg.tfe)
          fprintf(output, " tfe");
-      if (mimg->da)
+      if (mimg.da)
          fprintf(output, " da");
-      if (mimg->lwe)
+      if (mimg.lwe)
          fprintf(output, " lwe");
-      if (mimg->r128 || mimg->a16)
+      if (mimg.r128 || mimg.a16)
          fprintf(output, " r128/a16");
-      if (mimg->d16)
+      if (mimg.d16)
          fprintf(output, " d16");
-      if (mimg->disable_wqm)
+      if (mimg.disable_wqm)
          fprintf(output, " disable_wqm");
-      print_sync(mimg->sync, output);
+      print_sync(mimg.sync, output);
       break;
    }
    case Format::EXP: {
-      const Export_instruction* exp = instr->exp();
-      unsigned identity_mask = exp->compressed ? 0x5 : 0xf;
-      if ((exp->enabled_mask & identity_mask) != identity_mask)
+      const Export_instruction& exp = instr->exp();
+      unsigned identity_mask = exp.compressed ? 0x5 : 0xf;
+      if ((exp.enabled_mask & identity_mask) != identity_mask)
          fprintf(output, " en:%c%c%c%c",
-                 exp->enabled_mask & 0x1 ? 'r' : '*',
-                 exp->enabled_mask & 0x2 ? 'g' : '*',
-                 exp->enabled_mask & 0x4 ? 'b' : '*',
-                 exp->enabled_mask & 0x8 ? 'a' : '*');
-      if (exp->compressed)
+                 exp.enabled_mask & 0x1 ? 'r' : '*',
+                 exp.enabled_mask & 0x2 ? 'g' : '*',
+                 exp.enabled_mask & 0x4 ? 'b' : '*',
+                 exp.enabled_mask & 0x8 ? 'a' : '*');
+      if (exp.compressed)
          fprintf(output, " compr");
-      if (exp->done)
+      if (exp.done)
          fprintf(output, " done");
-      if (exp->valid_mask)
+      if (exp.valid_mask)
          fprintf(output, " vm");
 
-      if (exp->dest <= V_008DFC_SQ_EXP_MRT + 7)
-         fprintf(output, " mrt%d", exp->dest - V_008DFC_SQ_EXP_MRT);
-      else if (exp->dest == V_008DFC_SQ_EXP_MRTZ)
+      if (exp.dest <= V_008DFC_SQ_EXP_MRT + 7)
+         fprintf(output, " mrt%d", exp.dest - V_008DFC_SQ_EXP_MRT);
+      else if (exp.dest == V_008DFC_SQ_EXP_MRTZ)
          fprintf(output, " mrtz");
-      else if (exp->dest == V_008DFC_SQ_EXP_NULL)
+      else if (exp.dest == V_008DFC_SQ_EXP_NULL)
          fprintf(output, " null");
-      else if (exp->dest >= V_008DFC_SQ_EXP_POS && exp->dest <= V_008DFC_SQ_EXP_POS + 3)
-         fprintf(output, " pos%d", exp->dest - V_008DFC_SQ_EXP_POS);
-      else if (exp->dest >= V_008DFC_SQ_EXP_PARAM && exp->dest <= V_008DFC_SQ_EXP_PARAM + 31)
-         fprintf(output, " param%d", exp->dest - V_008DFC_SQ_EXP_PARAM);
+      else if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= V_008DFC_SQ_EXP_POS + 3)
+         fprintf(output, " pos%d", exp.dest - V_008DFC_SQ_EXP_POS);
+      else if (exp.dest >= V_008DFC_SQ_EXP_PARAM && exp.dest <= V_008DFC_SQ_EXP_PARAM + 31)
+         fprintf(output, " param%d", exp.dest - V_008DFC_SQ_EXP_PARAM);
       break;
    }
    case Format::PSEUDO_BRANCH: {
-      const Pseudo_branch_instruction* branch = instr->branch();
+      const Pseudo_branch_instruction& branch = instr->branch();
       /* Note: BB0 cannot be a branch target */
-      if (branch->target[0] != 0)
-         fprintf(output, " BB%d", branch->target[0]);
-      if (branch->target[1] != 0)
-         fprintf(output, ", BB%d", branch->target[1]);
+      if (branch.target[0] != 0)
+         fprintf(output, " BB%d", branch.target[0]);
+      if (branch.target[1] != 0)
+         fprintf(output, ", BB%d", branch.target[1]);
       break;
    }
    case Format::PSEUDO_REDUCTION: {
-      const Pseudo_reduction_instruction* reduce = instr->reduction();
-      fprintf(output, " op:%s", reduce_ops[reduce->reduce_op]);
-      if (reduce->cluster_size)
-         fprintf(output, " cluster_size:%u", reduce->cluster_size);
+      const Pseudo_reduction_instruction& reduce = instr->reduction();
+      fprintf(output, " op:%s", reduce_ops[reduce.reduce_op]);
+      if (reduce.cluster_size)
+         fprintf(output, " cluster_size:%u", reduce.cluster_size);
       break;
    }
    case Format::PSEUDO_BARRIER: {
-      const Pseudo_barrier_instruction* barrier = instr->barrier();
-      print_sync(barrier->sync, output);
-      print_scope(barrier->exec_scope, output, "exec_scope");
+      const Pseudo_barrier_instruction& barrier = instr->barrier();
+      print_sync(barrier.sync, output);
+      print_scope(barrier.exec_scope, output, "exec_scope");
       break;
    }
    case Format::FLAT:
    case Format::GLOBAL:
    case Format::SCRATCH: {
-      const FLAT_instruction* flat = instr->flatlike();
-      if (flat->offset)
-         fprintf(output, " offset:%u", flat->offset);
-      if (flat->glc)
+      const FLAT_instruction& flat = instr->flatlike();
+      if (flat.offset)
+         fprintf(output, " offset:%u", flat.offset);
+      if (flat.glc)
          fprintf(output, " glc");
-      if (flat->dlc)
+      if (flat.dlc)
          fprintf(output, " dlc");
-      if (flat->slc)
+      if (flat.slc)
          fprintf(output, " slc");
-      if (flat->lds)
+      if (flat.lds)
          fprintf(output, " lds");
-      if (flat->nv)
+      if (flat.nv)
          fprintf(output, " nv");
-      if (flat->disable_wqm)
+      if (flat.disable_wqm)
          fprintf(output, " disable_wqm");
-      print_sync(flat->sync, output);
+      print_sync(flat.sync, output);
       break;
    }
    case Format::MTBUF: {
-      const MTBUF_instruction* mtbuf = instr->mtbuf();
+      const MTBUF_instruction& mtbuf = instr->mtbuf();
       fprintf(output, " dfmt:");
-      switch (mtbuf->dfmt) {
+      switch (mtbuf.dfmt) {
       case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break;
       case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break;
       case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break;
@@ -548,7 +548,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
       case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break;
       }
       fprintf(output, " nfmt:");
-      switch (mtbuf->nfmt) {
+      switch (mtbuf.nfmt) {
       case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break;
       case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break;
       case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break;
@@ -558,27 +558,27 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
       case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break;
       case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break;
       }
-      if (mtbuf->offset)
-         fprintf(output, " offset:%u", mtbuf->offset);
-      if (mtbuf->offen)
+      if (mtbuf.offset)
+         fprintf(output, " offset:%u", mtbuf.offset);
+      if (mtbuf.offen)
          fprintf(output, " offen");
-      if (mtbuf->idxen)
+      if (mtbuf.idxen)
          fprintf(output, " idxen");
-      if (mtbuf->glc)
+      if (mtbuf.glc)
          fprintf(output, " glc");
-      if (mtbuf->dlc)
+      if (mtbuf.dlc)
          fprintf(output, " dlc");
-      if (mtbuf->slc)
+      if (mtbuf.slc)
          fprintf(output, " slc");
-      if (mtbuf->tfe)
+      if (mtbuf.tfe)
          fprintf(output, " tfe");
-      if (mtbuf->disable_wqm)
+      if (mtbuf.disable_wqm)
          fprintf(output, " disable_wqm");
-      print_sync(mtbuf->sync, output);
+      print_sync(mtbuf.sync, output);
       break;
    }
    case Format::VOP3P: {
-      if (instr->vop3p()->clamp)
+      if (instr->vop3p().clamp)
          fprintf(output, " clamp");
       break;
    }
@@ -587,8 +587,8 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
    }
    }
    if (instr->isVOP3()) {
-      const VOP3_instruction* vop3 = instr->vop3();
-      switch (vop3->omod) {
+      const VOP3_instruction& vop3 = instr->vop3();
+      switch (vop3.omod) {
       case 1:
          fprintf(output, " *2");
          break;
@@ -599,50 +599,50 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
          fprintf(output, " *0.5");
          break;
       }
-      if (vop3->clamp)
+      if (vop3.clamp)
          fprintf(output, " clamp");
-      if (vop3->opsel & (1 << 3))
+      if (vop3.opsel & (1 << 3))
          fprintf(output, " opsel_hi");
    } else if (instr->isDPP()) {
-      const DPP_instruction* dpp = instr->dpp();
-      if (dpp->dpp_ctrl <= 0xff) {
+      const DPP_instruction& dpp = instr->dpp();
+      if (dpp.dpp_ctrl <= 0xff) {
          fprintf(output, " quad_perm:[%d,%d,%d,%d]",
-                 dpp->dpp_ctrl & 0x3, (dpp->dpp_ctrl >> 2) & 0x3,
-                 (dpp->dpp_ctrl >> 4) & 0x3, (dpp->dpp_ctrl >> 6) & 0x3);
-      } else if (dpp->dpp_ctrl >= 0x101 && dpp->dpp_ctrl <= 0x10f) {
-         fprintf(output, " row_shl:%d", dpp->dpp_ctrl & 0xf);
-      } else if (dpp->dpp_ctrl >= 0x111 && dpp->dpp_ctrl <= 0x11f) {
-         fprintf(output, " row_shr:%d", dpp->dpp_ctrl & 0xf);
-      } else if (dpp->dpp_ctrl >= 0x121 && dpp->dpp_ctrl <= 0x12f) {
-         fprintf(output, " row_ror:%d", dpp->dpp_ctrl & 0xf);
-      } else if (dpp->dpp_ctrl == dpp_wf_sl1) {
+                 dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3,
+                 (dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3);
+      } else if (dpp.dpp_ctrl >= 0x101 && dpp.dpp_ctrl <= 0x10f) {
+         fprintf(output, " row_shl:%d", dpp.dpp_ctrl & 0xf);
+      } else if (dpp.dpp_ctrl >= 0x111 && dpp.dpp_ctrl <= 0x11f) {
+         fprintf(output, " row_shr:%d", dpp.dpp_ctrl & 0xf);
+      } else if (dpp.dpp_ctrl >= 0x121 && dpp.dpp_ctrl <= 0x12f) {
+         fprintf(output, " row_ror:%d", dpp.dpp_ctrl & 0xf);
+      } else if (dpp.dpp_ctrl == dpp_wf_sl1) {
          fprintf(output, " wave_shl:1");
-      } else if (dpp->dpp_ctrl == dpp_wf_rl1) {
+      } else if (dpp.dpp_ctrl == dpp_wf_rl1) {
          fprintf(output, " wave_rol:1");
-      } else if (dpp->dpp_ctrl == dpp_wf_sr1) {
+      } else if (dpp.dpp_ctrl == dpp_wf_sr1) {
          fprintf(output, " wave_shr:1");
-      } else if (dpp->dpp_ctrl == dpp_wf_rr1) {
+      } else if (dpp.dpp_ctrl == dpp_wf_rr1) {
          fprintf(output, " wave_ror:1");
-      } else if (dpp->dpp_ctrl == dpp_row_mirror) {
+      } else if (dpp.dpp_ctrl == dpp_row_mirror) {
          fprintf(output, " row_mirror");
-      } else if (dpp->dpp_ctrl == dpp_row_half_mirror) {
+      } else if (dpp.dpp_ctrl == dpp_row_half_mirror) {
          fprintf(output, " row_half_mirror");
-      } else if (dpp->dpp_ctrl == dpp_row_bcast15) {
+      } else if (dpp.dpp_ctrl == dpp_row_bcast15) {
          fprintf(output, " row_bcast:15");
-      } else if (dpp->dpp_ctrl == dpp_row_bcast31) {
+      } else if (dpp.dpp_ctrl == dpp_row_bcast31) {
          fprintf(output, " row_bcast:31");
       } else {
-         fprintf(output, " dpp_ctrl:0x%.3x", dpp->dpp_ctrl);
+         fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl);
       }
-      if (dpp->row_mask != 0xf)
-         fprintf(output, " row_mask:0x%.1x", dpp->row_mask);
-      if (dpp->bank_mask != 0xf)
-         fprintf(output, " bank_mask:0x%.1x", dpp->bank_mask);
-      if (dpp->bound_ctrl)
+      if (dpp.row_mask != 0xf)
+         fprintf(output, " row_mask:0x%.1x", dpp.row_mask);
+      if (dpp.bank_mask != 0xf)
+         fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask);
+      if (dpp.bound_ctrl)
          fprintf(output, " bound_ctrl:1");
    } else if (instr->isSDWA()) {
-      const SDWA_instruction* sdwa = instr->sdwa();
-      switch (sdwa->omod) {
+      const SDWA_instruction& sdwa = instr->sdwa();
+      switch (sdwa.omod) {
       case 1:
          fprintf(output, " *2");
          break;
@@ -653,25 +653,25 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
          fprintf(output, " *0.5");
          break;
       }
-      if (sdwa->clamp)
+      if (sdwa.clamp)
          fprintf(output, " clamp");
-      switch (sdwa->dst_sel & sdwa_asuint) {
+      switch (sdwa.dst_sel & sdwa_asuint) {
       case sdwa_udword:
          break;
       case sdwa_ubyte0:
       case sdwa_ubyte1:
       case sdwa_ubyte2:
       case sdwa_ubyte3:
-         fprintf(output, " dst_sel:%sbyte%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
-                 sdwa->dst_sel & sdwa_bytenum);
+         fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
+                 sdwa.dst_sel & sdwa_bytenum);
          break;
       case sdwa_uword0:
       case sdwa_uword1:
-         fprintf(output, " dst_sel:%sword%u", sdwa->dst_sel & sdwa_sext ? "s" : "u",
-                 sdwa->dst_sel & sdwa_wordnum);
+         fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",
+                 sdwa.dst_sel & sdwa_wordnum);
          break;
       }
-      if (sdwa->dst_preserve)
+      if (sdwa.dst_preserve)
          fprintf(output, " dst_preserve");
    }
 }
@@ -693,28 +693,28 @@ void aco_print_instr(const Instruction *instr, FILE *output)
       bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
       uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
       if (instr->isVOP3()) {
-         const VOP3_instruction* vop3 = instr->vop3();
+         const VOP3_instruction& vop3 = instr->vop3();
          for (unsigned i = 0; i < instr->operands.size(); ++i) {
-            abs[i] = vop3->abs[i];
-            neg[i] = vop3->neg[i];
-            opsel[i] = vop3->opsel & (1 << i);
+            abs[i] = vop3.abs[i];
+            neg[i] = vop3.neg[i];
+            opsel[i] = vop3.opsel & (1 << i);
             sel[i] = sdwa_udword;
          }
       } else if (instr->isDPP()) {
-         const DPP_instruction* dpp = instr->dpp();
+         const DPP_instruction& dpp = instr->dpp();
          for (unsigned i = 0; i < instr->operands.size(); ++i) {
-            abs[i] = i < 2 ? dpp->abs[i] : false;
-            neg[i] = i < 2 ? dpp->neg[i] : false;
+            abs[i] = i < 2 ? dpp.abs[i] : false;
+            neg[i] = i < 2 ? dpp.neg[i] : false;
             opsel[i] = false;
             sel[i] = sdwa_udword;
          }
       } else if (instr->isSDWA()) {
-         const SDWA_instruction* sdwa = instr->sdwa();
+         const SDWA_instruction& sdwa = instr->sdwa();
          for (unsigned i = 0; i < instr->operands.size(); ++i) {
-            abs[i] = i < 2 ? sdwa->abs[i] : false;
-            neg[i] = i < 2 ? sdwa->neg[i] : false;
+            abs[i] = i < 2 ? sdwa.abs[i] : false;
+            neg[i] = i < 2 ? sdwa.neg[i] : false;
             opsel[i] = false;
-            sel[i] = i < 2 ? sdwa->sel[i] : sdwa_udword;
+            sel[i] = i < 2 ? sdwa.sel[i] : sdwa_udword;
          }
       } else {
          for (unsigned i = 0; i < instr->operands.size(); ++i) {
@@ -756,17 +756,17 @@ void aco_print_instr(const Instruction *instr, FILE *output)
             fprintf(output, "|");
 
          if (instr->isVOP3P()) {
-            const VOP3P_instruction* vop3 = instr->vop3p();
-            if ((vop3->opsel_lo & (1 << i)) || !(vop3->opsel_hi & (1 << i))) {
+            const VOP3P_instruction& vop3 = instr->vop3p();
+            if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
                fprintf(output, ".%c%c",
-                       vop3->opsel_lo & (1 << i) ? 'y' : 'x',
-                       vop3->opsel_hi & (1 << i) ? 'y' : 'x');
+                       vop3.opsel_lo & (1 << i) ? 'y' : 'x',
+                       vop3.opsel_hi & (1 << i) ? 'y' : 'x');
             }
-            if (vop3->neg_lo[i] && vop3->neg_hi[i])
+            if (vop3.neg_lo[i] && vop3.neg_hi[i])
                fprintf(output, "*[-1,-1]");
-            else if (vop3->neg_lo[i])
+            else if (vop3.neg_lo[i])
                fprintf(output, "*[-1,1]");
-            else if (vop3->neg_hi[i])
+            else if (vop3.neg_hi[i])
                fprintf(output, "*[1,-1]");
          }
       }
diff --git a/src/amd/compiler/aco_reduce_assign.cpp b/src/amd/compiler/aco_reduce_assign.cpp
index 1c915f2..1d1a025 100644
--- a/src/amd/compiler/aco_reduce_assign.cpp
+++ b/src/amd/compiler/aco_reduce_assign.cpp
@@ -91,7 +91,7 @@ void setup_reduce_temp(Program* program)
          if (instr->format != Format::PSEUDO_REDUCTION)
             continue;
 
-         ReduceOp op = instr->reduction()->reduce_op;
+         ReduceOp op = instr->reduction().reduce_op;
          reduceTmp_in_loop |= block.loop_nest_depth > 0;
 
          if ((int)last_top_level_block_idx != inserted_at) {
@@ -115,7 +115,7 @@ void setup_reduce_temp(Program* program)
          }
 
          /* same as before, except for the vector temporary instead of the reduce temporary */
-         unsigned cluster_size = instr->reduction()->cluster_size;
+         unsigned cluster_size = instr->reduction().cluster_size;
          bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
                           op == fmin64 || op == fmax64 || op == umin64 ||
                           op == umax64 || op == imin64 || op == imax64 ||
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 7808935..84e172f 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -503,13 +503,13 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
          update_phi_map(ctx, tmp.get(), instr.get());
       return;
    } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
-      instr->vop3()->opsel |= (byte / 2) << idx;
+      instr->vop3().opsel |= (byte / 2) << idx;
       return;
    } else if (instr->isVOP3P() && byte == 2) {
-      VOP3P_instruction* vop3p = instr->vop3p();
-      assert(!(vop3p->opsel_lo & (1 << idx)));
-      vop3p->opsel_lo |= 1 << idx;
-      vop3p->opsel_hi |= 1 << idx;
+      VOP3P_instruction& vop3p = instr->vop3p();
+      assert(!(vop3p.opsel_lo & (1 << idx)));
+      vop3p.opsel_lo |= 1 << idx;
+      vop3p.opsel_hi |= 1 << idx;
       return;
    }
 
@@ -613,9 +613,9 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi
          convert_to_SDWA(chip, instr);
       return;
    } else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
-      VOP3_instruction *vop3 = instr->vop3();
+      VOP3_instruction& vop3 = instr->vop3();
       if (reg.byte() == 2)
-         vop3->opsel |= (1 << 3); /* dst in high half */
+         vop3.opsel |= (1 << 3); /* dst in high half */
       return;
    }
 
@@ -1569,7 +1569,7 @@ void handle_pseudo(ra_ctx& ctx,
       return;
 
    if (reg_file[scc]) {
-      instr->pseudo()->tmp_in_scc = true;
+      instr->pseudo().tmp_in_scc = true;
 
       int reg = ctx.max_used_sgpr;
       for (; reg >= 0 && reg_file[PhysReg{(unsigned)reg}]; reg--)
@@ -1585,9 +1585,9 @@ void handle_pseudo(ra_ctx& ctx,
       }
 
       adjust_max_used_regs(ctx, s1, reg);
-      instr->pseudo()->scratch_sgpr = PhysReg{(unsigned)reg};
+      instr->pseudo().scratch_sgpr = PhysReg{(unsigned)reg};
    } else {
-      instr->pseudo()->tmp_in_scc = false;
+      instr->pseudo().tmp_in_scc = false;
    }
 }
 
@@ -2156,7 +2156,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
 
             if (instr->isEXP() ||
                 (instr->isVMEM() && i == 3 && ctx.program->chip_class == GFX6) ||
-                (instr->isDS() && instr->ds()->gds)) {
+                (instr->isDS() && instr->ds().gds)) {
                for (unsigned j = 0; j < operand.size(); j++)
                   ctx.war_hint.set(operand.physReg().reg() + j);
             }
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp
index 25aee52..ad85259 100644
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -320,7 +320,7 @@ void MoveState::upwards_skip()
 bool is_gs_or_done_sendmsg(const Instruction *instr)
 {
    if (instr->opcode == aco_opcode::s_sendmsg) {
-      uint16_t imm = instr->sopp()->imm;
+      uint16_t imm = instr->sopp().imm;
       return (imm & sendmsg_id_mask) == _sendmsg_gs ||
              (imm & sendmsg_id_mask) == _sendmsg_gs_done;
    }
@@ -330,7 +330,7 @@ bool is_gs_or_done_sendmsg(const Instruction *instr)
 bool is_done_sendmsg(const Instruction *instr)
 {
    if (instr->opcode == aco_opcode::s_sendmsg)
-      return (instr->sopp()->imm & sendmsg_id_mask) == _sendmsg_gs_done;
+      return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
    return false;
 }
 
@@ -380,14 +380,14 @@ void add_memory_event(memory_event_set *set, Instruction *instr, memory_sync_inf
 {
    set->has_control_barrier |= is_done_sendmsg(instr);
    if (instr->opcode == aco_opcode::p_barrier) {
-      Pseudo_barrier_instruction *bar = instr->barrier();
-      if (bar->sync.semantics & semantic_acquire)
-         set->bar_acquire |= bar->sync.storage;
-      if (bar->sync.semantics & semantic_release)
-         set->bar_release |= bar->sync.storage;
-      set->bar_classes |= bar->sync.storage;
-
-      set->has_control_barrier |= bar->exec_scope > scope_invocation;
+      Pseudo_barrier_instruction& bar = instr->barrier();
+      if (bar.sync.semantics & semantic_acquire)
+         set->bar_acquire |= bar.sync.storage;
+      if (bar.sync.semantics & semantic_release)
+         set->bar_release |= bar.sync.storage;
+      set->bar_classes |= bar.sync.storage;
+
+      set->has_control_barrier |= bar.exec_scope > scope_invocation;
    }
 
    if (!sync->storage)
@@ -857,7 +857,7 @@ void schedule_block(sched_ctx& ctx, Program *program, Block* block, live& live_v
       Instruction* current = block->instructions[idx].get();
 
       if (block->kind & block_kind_export_end && current->isEXP()) {
-         unsigned target = current->exp()->dest;
+         unsigned target = current->exp().dest;
          if (target >= V_008DFC_SQ_EXP_POS && target < V_008DFC_SQ_EXP_PRIM) {
             ctx.mv.current = current;
             schedule_position_export(ctx, block, live_vars.register_demand[block->index], current, idx);
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp
index 5b6339e..02e16c0 100644
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -283,7 +283,7 @@ aco_ptr<Instruction> do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t
          res.reset(create_instruction<Pseudo_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
       } else if (instr->isSOPK()) {
          res.reset(create_instruction<SOPK_instruction>(instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
-         res->sopk()->imm = instr->sopk()->imm;
+         res->sopk().imm = instr->sopk().imm;
       }
       for (unsigned i = 0; i < instr->operands.size(); i++) {
          res->operands[i] = instr->operands[i];
@@ -1589,11 +1589,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
                   bld.insert(split);
                   for (unsigned i = 0; i < temp.size(); i++) {
                      Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false, true);
-                     instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+                     instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
                   }
                } else {
                   Instruction *instr = bld.mubuf(opcode, scratch_rsrc, Operand(v1), scratch_offset, temp, offset, false, true);
-                  instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+                  instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
                }
             } else {
                ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
@@ -1658,12 +1658,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
                      Temp tmp = bld.tmp(v1);
                      vec->operands[i] = Operand(tmp);
                      Instruction *instr = bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(v1), scratch_offset, offset + i * 4, false, true);
-                     instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+                     instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
                   }
                   bld.insert(vec);
                } else {
                   Instruction *instr = bld.mubuf(opcode, def, scratch_rsrc, Operand(v1), scratch_offset, offset, false, true);
-                  instr->mubuf()->sync = memory_sync_info(storage_vgpr_spill, semantic_private);
+                  instr->mubuf().sync = memory_sync_info(storage_vgpr_spill, semantic_private);
                }
             } else {
                uint32_t spill_slot = slots[spill_id];
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp
index b3d6db7..54f7405 100644
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -178,10 +178,10 @@ void try_remove_invert_block(ssa_elimination_ctx& ctx, Block* block)
       pred->linear_succs[0] = succ_idx;
       ctx.program->blocks[succ_idx].linear_preds[i] = pred->index;
 
-      Pseudo_branch_instruction *branch = pred->instructions.back()->branch();
-      assert(branch->isBranch());
-      branch->target[0] = succ_idx;
-      branch->target[1] = succ_idx;
+      Pseudo_branch_instruction& branch = pred->instructions.back()->branch();
+      assert(branch.isBranch());
+      branch.target[0] = succ_idx;
+      branch.target[1] = succ_idx;
    }
 
    block->instructions.clear();
@@ -196,17 +196,17 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
 
    Block& pred = ctx.program->blocks[block->linear_preds[0]];
    Block& succ = ctx.program->blocks[block->linear_succs[0]];
-   Pseudo_branch_instruction* branch = pred.instructions.back()->branch();
-   if (branch->opcode == aco_opcode::p_branch) {
-      branch->target[0] = succ.index;
-      branch->target[1] = succ.index;
-   } else if (branch->target[0] == block->index) {
-      branch->target[0] = succ.index;
-   } else if (branch->target[0] == succ.index) {
-      assert(branch->target[1] == block->index);
-      branch->target[1] = succ.index;
-      branch->opcode = aco_opcode::p_branch;
-   } else if (branch->target[1] == block->index) {
+   Pseudo_branch_instruction& branch = pred.instructions.back()->branch();
+   if (branch.opcode == aco_opcode::p_branch) {
+      branch.target[0] = succ.index;
+      branch.target[1] = succ.index;
+   } else if (branch.target[0] == block->index) {
+      branch.target[0] = succ.index;
+   } else if (branch.target[0] == succ.index) {
+      assert(branch.target[1] == block->index);
+      branch.target[1] = succ.index;
+      branch.opcode = aco_opcode::p_branch;
+   } else if (branch.target[1] == block->index) {
       /* check if there is a fall-through path from block to succ */
       bool falls_through = block->index < succ.index;
       for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) {
@@ -215,35 +215,35 @@ void try_remove_simple_block(ssa_elimination_ctx& ctx, Block* block)
             falls_through = false;
       }
       if (falls_through) {
-         branch->target[1] = succ.index;
+         branch.target[1] = succ.index;
       } else {
          /* check if there is a fall-through path for the alternative target */
-         if (block->index >= branch->target[0])
+         if (block->index >= branch.target[0])
             return;
-         for (unsigned j = block->index + 1; j < branch->target[0]; j++) {
+         for (unsigned j = block->index + 1; j < branch.target[0]; j++) {
             if (!ctx.program->blocks[j].instructions.empty())
                return;
          }
 
          /* This is a (uniform) break or continue block. The branch condition has to be inverted. */
-         if (branch->opcode == aco_opcode::p_cbranch_z)
-            branch->opcode = aco_opcode::p_cbranch_nz;
-         else if (branch->opcode == aco_opcode::p_cbranch_nz)
-            branch->opcode = aco_opcode::p_cbranch_z;
+         if (branch.opcode == aco_opcode::p_cbranch_z)
+            branch.opcode = aco_opcode::p_cbranch_nz;
+         else if (branch.opcode == aco_opcode::p_cbranch_nz)
+            branch.opcode = aco_opcode::p_cbranch_z;
          else
             assert(false);
          /* also invert the linear successors */
          pred.linear_succs[0] = pred.linear_succs[1];
          pred.linear_succs[1] = succ.index;
-         branch->target[1] = branch->target[0];
-         branch->target[0] = succ.index;
+         branch.target[1] = branch.target[0];
+         branch.target[0] = succ.index;
       }
    } else {
       assert(false);
    }
 
-   if (branch->target[0] == branch->target[1])
-      branch->opcode = aco_opcode::p_branch;
+   if (branch.target[0] == branch.target[1])
+      branch.opcode = aco_opcode::p_branch;
 
    for (unsigned i = 0; i < pred.linear_succs.size(); i++)
       if (pred.linear_succs[i] == block->index)
diff --git a/src/amd/compiler/aco_statistics.cpp b/src/amd/compiler/aco_statistics.cpp
index 7e9c825..15baa26 100644
--- a/src/amd/compiler/aco_statistics.cpp
+++ b/src/amd/compiler/aco_statistics.cpp
@@ -46,7 +46,7 @@ void collect_preasm_stats(Program *program)
       program->statistics[statistic_instructions] += block.instructions.size();
 
       for (aco_ptr<Instruction>& instr : block.instructions) {
-         if (instr->isSOPP() && instr->sopp()->block != -1)
+         if (instr->isSOPP() && instr->sopp().block != -1)
             program->statistics[statistic_branches]++;
 
          if (instr->opcode == aco_opcode::p_constaddr)
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index 2c53554..3b21741 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -148,10 +148,10 @@ bool validate_ir(Program* program)
 
             check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
 
-            SDWA_instruction *sdwa = instr->sdwa();
-            check(sdwa->omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
+            SDWA_instruction& sdwa = instr->sdwa();
+            check(sdwa.omod == 0 || program->chip_class >= GFX9, "SDWA omod only supported on GFX9+", instr.get());
             if (base_format == Format::VOPC) {
-               check(sdwa->clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
+               check(sdwa.clamp == false || program->chip_class == GFX8, "SDWA VOPC clamp only supported on GFX8", instr.get());
                check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
                      program->chip_class >= GFX9,
                      "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
@@ -183,21 +183,21 @@ bool validate_ir(Program* program)
             }
 
             if (instr->definitions[0].regClass().is_subdword())
-               check((sdwa->dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get());
+               check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()), "Unexpected SDWA sel for sub-dword definition", instr.get());
          }
 
          /* check opsel */
          if (instr->isVOP3()) {
-            VOP3_instruction *vop3 = instr->vop3();
-            check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
+            VOP3_instruction& vop3 = instr->vop3();
+            check(vop3.opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
 
             for (unsigned i = 0; i < 3; i++) {
                if (i >= instr->operands.size() ||
                    (instr->operands[i].hasRegClass() && instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
-                  check((vop3->opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
+                  check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
             }
             if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
-               check((vop3->opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get());
+               check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition", instr.get());
          }
 
          /* check for undefs */
@@ -377,7 +377,7 @@ bool validate_ir(Program* program)
             for (const Operand &op : instr->operands)
                check(op.regClass().type() == RegType::vgpr, "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.", instr.get());
 
-            if (instr->opcode == aco_opcode::p_reduce && instr->reduction()->cluster_size == program->wave_size)
+            if (instr->opcode == aco_opcode::p_reduce && instr->reduction().cluster_size == program->wave_size)
                check(instr->definitions[0].regClass().type() == RegType::sgpr, "The result of unclustered reductions must go into an SGPR.", instr.get());
             else
                check(instr->definitions[0].regClass().type() == RegType::vgpr, "The result of scans and clustered reductions must go into a VGPR.", instr.get());
@@ -549,7 +549,7 @@ bool validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& inst
       return byte == 0;
    if (instr->isPseudo() && chip >= GFX8)
       return true;
-   if (instr->isSDWA() && (instr->sdwa()->sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
+   if (instr->isSDWA() && (instr->sdwa().sel[index] & sdwa_asuint) == (sdwa_isra | op.bytes()))
       return true;
    if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
       return true;
@@ -599,7 +599,7 @@ bool validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& i
 
    if (instr->isPseudo() && chip >= GFX8)
       return true;
-   if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
+   if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
       return true;
    if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
       return true;
@@ -630,7 +630,7 @@ unsigned get_subdword_bytes_written(Program *program, const aco_ptr<Instruction>
 
    if (instr->isPseudo())
       return chip >= GFX8 ? def.bytes() : def.size() * 4u;
-   if (instr->isSDWA() && instr->sdwa()->dst_sel == (sdwa_isra | def.bytes()))
+   if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
       return def.bytes();
 
    switch (instr->opcode) {
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index c0c2165..94105e3 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3)
    //! v1: %res1 = v_add_u32 %a, %tmp1
    //! p_unit_test 1, %res1
    tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
-   tmp.instr->vop3()->clamp = true;
+   tmp.instr->vop3().clamp = true;
    writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
 
    //! v1: %tmp2 = v_add_u32 %b, %c
@@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3)
    //! p_unit_test 2, %res2
    tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
    tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
-   tmp.instr->vop3()->clamp = true;
+   tmp.instr->vop3().clamp = true;
    writeout(2, tmp);
 
    finish_opt_test();
diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp
index 2c79203..ec4a140 100644
--- a/src/amd/compiler/tests/test_to_hw_instr.cpp
+++ b/src/amd/compiler/tests/test_to_hw_instr.cpp
@@ -197,7 +197,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
                                        Definition(v0_lo, v1),
                                        Operand(v0_lo, v1b), Operand(v0_lo, v1b),
                                        Operand(v0_lo, v1b), Operand(v0_lo, v1b));
-      pseudo->pseudo()->scratch_sgpr = m0;
+      pseudo->pseudo().scratch_sgpr = m0;
 
       //~gfx[67]! p_unit_test 14
       //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
-- 
2.7.4