aco: remove VOP[123C]P? structs

author Georg Lehmann <dadschoorse@gmail.com>

Tue, 21 Feb 2023 19:08:42 +0000 (20:08 +0100)

committer Marge Bot <emma+marge@anholt.net>

Tue, 7 Mar 2023 11:53:23 +0000 (11:53 +0000)
author Georg Lehmann <dadschoorse@gmail.com>
Tue, 21 Feb 2023 19:08:42 +0000 (20:08 +0100)
committer Marge Bot <emma+marge@anholt.net>
Tue, 7 Mar 2023 11:53:23 +0000 (11:53 +0000)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp

index a49b466..c2b4f9c 100644 (file)
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -780,7 +780,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
         * except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR.
         */
        if (instr->isVOP3()) {
-         VOP3_instruction& vop3 = instr->vop3();
+         VALU_instruction& vop3 = instr->valu();
  
           if (instr->isVOP2()) {
              opcode = opcode + 0x100;
@@ -840,7 +840,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
           out.push_back(encoding);
  
        } else if (instr->isVOP3P()) {
-         VOP3P_instruction& vop3 = instr->vop3p();
+         VALU_instruction& vop3 = instr->valu();
  
           uint32_t encoding;
           if (ctx.gfx_level == GFX9) {
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py

index 0cc869b..9ab0a2a 100644 (file)
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -517,9 +517,9 @@ public:
        int num_defs = carry_out ? 2 : 1;
        aco_ptr<Instruction> sub;
        if (vop3)
-        sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs));
+        sub.reset(create_instruction<VALU_instruction>(op, Format::VOP3, num_ops, num_defs));
        else
-        sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs));
+        sub.reset(create_instruction<VALU_instruction>(op, Format::VOP2, num_ops, num_defs));
        sub->operands[0] = a.op;
        sub->operands[1] = b.op;
        if (!borrow.op.isUndefined())
@@ -562,14 +562,14 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
             ("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])),
             ("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]),
             ("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 3)]),
-           ("vop1", [Format.VOP1], 'VOP1_instruction', [(0, 0), (1, 1), (2, 2)]),
+           ("vop1", [Format.VOP1], 'VALU_instruction', [(0, 0), (1, 1), (2, 2)]),
             ("vop1_sdwa", [Format.VOP1, Format.SDWA], 'SDWA_instruction', [(1, 1)]),
-           ("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])),
+           ("vop2", [Format.VOP2], 'VALU_instruction', itertools.product([1, 2], [2, 3])),
             ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])),
-           ("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])),
+           ("vopc", [Format.VOPC], 'VALU_instruction', itertools.product([1, 2], [2])),
             ("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
-           ("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
-           ("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]),
+           ("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
+           ("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
             ("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
             ("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
             ("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
@@ -578,9 +578,9 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
             ("vop1_dpp8", [Format.VOP1, Format.DPP8], 'DPP8_instruction', [(1, 1)]),
             ("vop2_dpp8", [Format.VOP2, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])),
             ("vopc_dpp8", [Format.VOPC, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])),
-           ("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])),
-           ("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])),
-           ("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])),
+           ("vop1_e64", [Format.VOP1, Format.VOP3], 'VALU_instruction', itertools.product([1], [1])),
+           ("vop2_e64", [Format.VOP2, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2, 3])),
+           ("vopc_e64", [Format.VOPC, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2])),
             ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
             ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)]),
             ("scratch", [Format.SCRATCH], 'FLAT_instruction', [(0, 3), (1, 2)])]
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index 7e606ca..c2f2423 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -286,7 +286,7 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
           Temp op1 = bld.copy(bld.def(s1), Operand::c32(lane_mask & 0xffffffff));
           Temp op2 = bld.copy(bld.def(s1), Operand::c32(lane_mask >> 32));
           Builder::Result ret = bld.vop3(opcode, bld.def(v1), src, op1, op2);
-         ret->vop3().opsel = 0x3; /* set BOUND_CTRL/FETCH_INACTIVE */
+         ret->valu().opsel = 0x3; /* set BOUND_CTRL/FETCH_INACTIVE */
           return ret;
        }
  
@@ -1035,8 +1035,8 @@ emit_idot_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te
  
     Builder bld(ctx->program, ctx->block);
     bld.is_precise = instr->exact;
-   VOP3P_instruction& vop3p =
-      bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7)->vop3p();
+   VALU_instruction& vop3p =
+      bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7)->valu();
     vop3p.clamp = clamp;
     u_foreach_bit (i, neg_lo)
        vop3p.neg_lo[i] = true;
@@ -1363,7 +1363,7 @@ emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val)
     Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
  
     Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
-   add->vop3().neg[1] = true;
+   add->valu().neg[1] = true;
  
     return add->definitions[0].getTemp();
  }
@@ -1383,7 +1383,7 @@ uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
     } else {
        add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1);
     }
-   add->vop3().clamp = 1;
+   add->valu().clamp = 1;
     return dst.getTemp();
  }
  
@@ -1402,7 +1402,7 @@ usub32_sat(Builder& bld, Definition dst, Temp src0, Temp src1)
     } else {
        sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1);
     }
-   sub->vop3().clamp = 1;
+   sub->valu().clamp = 1;
     return dst.getTemp();
  }
  
@@ -1936,7 +1936,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
        if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
           Instruction* add_instr =
              emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
-         add_instr->vop3p().clamp = 1;
+         add_instr->valu().clamp = 1;
           break;
        }
        Temp src0 = get_alu_src(ctx, instr->src[0]);
@@ -1957,7 +1957,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
              add_instr =
                 bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
           }
-         add_instr->vop3().clamp = 1;
+         add_instr->valu().clamp = 1;
           break;
        } else if (dst.regClass() == v1) {
           uadd32_sat(bld, Definition(dst), src0, src1);
@@ -1998,7 +1998,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
              carry1 = bld.tmp(bld.lm);
              bld.vop2_e64(aco_opcode::v_addc_co_u32, Definition(dst1), Definition(carry1),
                           as_vgpr(ctx, src01), as_vgpr(ctx, src11), carry0)
-               ->vop3().clamp = 1;
+               ->valu()
+               .clamp = 1;
           } else {
              Temp no_sat1 = bld.tmp(v1);
              carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
@@ -2018,7 +2019,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
        if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
           Instruction* add_instr =
              emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
-         add_instr->vop3p().clamp = 1;
+         add_instr->valu().clamp = 1;
           break;
        }
        Temp src0 = get_alu_src(ctx, instr->src[0]);
@@ -2039,11 +2040,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
        if (dst.regClass() == v2b) {
           Instruction* add_instr =
              bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr;
-         add_instr->vop3().clamp = 1;
+         add_instr->valu().clamp = 1;
        } else if (dst.regClass() == v1) {
           Instruction* add_instr =
              bld.vop3(aco_opcode::v_add_i32, Definition(dst), src0, src1).instr;
-         add_instr->vop3().clamp = 1;
+         add_instr->valu().clamp = 1;
        } else {
           isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
@@ -2178,7 +2179,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
     case nir_op_usub_sat: {
        if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
           Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_u16, dst);
-         sub_instr->vop3p().clamp = 1;
+         sub_instr->valu().clamp = 1;
           break;
        }
        Temp src0 = get_alu_src(ctx, instr->src[0]);
@@ -2200,7 +2201,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
              }
              sub_instr = bld.vop2_e64(op, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
           }
-         sub_instr->vop3().clamp = 1;
+         sub_instr->valu().clamp = 1;
           break;
        } else if (dst.regClass() == v1) {
           usub32_sat(bld, Definition(dst), src0, as_vgpr(ctx, src1));
@@ -2240,7 +2241,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
              carry1 = bld.tmp(bld.lm);
              bld.vop2_e64(aco_opcode::v_subb_co_u32, Definition(dst1), Definition(carry1),
                           as_vgpr(ctx, src01), as_vgpr(ctx, src11), carry0)
-               ->vop3().clamp = 1;
+               ->valu()
+               .clamp = 1;
           } else {
              Temp no_sat1 = bld.tmp(v1);
              carry1 = bld.vsub32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
@@ -2259,7 +2261,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
     case nir_op_isub_sat: {
        if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
           Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_i16, dst);
-         sub_instr->vop3p().clamp = 1;
+         sub_instr->valu().clamp = 1;
           break;
        }
        Temp src0 = get_alu_src(ctx, instr->src[0]);
@@ -2280,11 +2282,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
        if (dst.regClass() == v2b) {
           Instruction* sub_instr =
              bld.vop3(aco_opcode::v_sub_i16, Definition(dst), src0, src1).instr;
-         sub_instr->vop3().clamp = 1;
+         sub_instr->valu().clamp = 1;
        } else if (dst.regClass() == v1) {
           Instruction* sub_instr =
              bld.vop3(aco_opcode::v_sub_i32, Definition(dst), src0, src1).instr;
-         sub_instr->vop3().clamp = 1;
+         sub_instr->valu().clamp = 1;
        } else {
           isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
@@ -2395,7 +2397,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
     case nir_op_fsub: {
        if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
           Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
-         VOP3P_instruction& sub = add->vop3p();
+         VALU_instruction& sub = add->valu();
           sub.neg_lo[1] = true;
           sub.neg_hi[1] = true;
           break;
@@ -2416,7 +2418,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
        } else if (dst.regClass() == v2) {
           Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0),
                                       as_vgpr(ctx, src1));
-         add->vop3().neg[1] = true;
+         add->valu().neg[1] = true;
        } else {
           isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
@@ -2585,8 +2587,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
           Instruction* vop3p =
              bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
                        instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
-         vop3p->vop3p().neg_lo[0] = true;
-         vop3p->vop3p().neg_hi[0] = true;
+         vop3p->valu().neg_lo[0] = true;
+         vop3p->valu().neg_hi[0] = true;
           break;
        }
        Temp src = get_alu_src(ctx, instr->src[0]);
@@ -2615,8 +2617,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
              bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
                        instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
                 .instr;
-         vop3p->vop3p().neg_lo[1] = true;
-         vop3p->vop3p().neg_hi[1] = true;
+         vop3p->valu().neg_lo[1] = true;
+         vop3p->valu().neg_hi[1] = true;
           break;
        }
        Temp src = get_alu_src(ctx, instr->src[0]);
@@ -2624,12 +2626,12 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
           Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),
                                           Operand::c16(0x3c00), as_vgpr(ctx, src))
                                 .instr;
-         mul->vop3().abs[1] = true;
+         mul->valu().abs[1] = true;
        } else if (dst.regClass() == v1) {
           Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f32, Definition(dst),
                                           Operand::c32(0x3f800000u), as_vgpr(ctx, src))
                                 .instr;
-         mul->vop3().abs[1] = true;
+         mul->valu().abs[1] = true;
        } else if (dst.regClass() == v2) {
           if (ctx->block->fp_mode.must_flush_denorms16_64)
              src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
@@ -2649,7 +2651,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
           Instruction* vop3p =
              bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
                        instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
-         vop3p->vop3p().clamp = true;
+         vop3p->valu().clamp = true;
           break;
        }
        Temp src = get_alu_src(ctx, instr->src[0]);
@@ -2664,7 +2666,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
           // TODO: confirm that this holds under any circumstances
        } else if (dst.regClass() == v2) {
           Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand::zero());
-         add->vop3().clamp = true;
+         add->valu().clamp = true;
        } else {
           isel_err(&instr->instr, "Unimplemented NIR instr bit size");
        }
@@ -2814,13 +2816,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
              Instruction* sub =
                 bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp,
                          bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi));
-            sub->vop3().neg[1] = true;
+            sub->valu().neg[1] = true;
              tmp = sub->definitions[0].getTemp();
  
              Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
                                  Operand::c32(0x432fffffu));
              Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, v);
-            vop3->vop3().abs[0] = true;
+            vop3->valu().abs[0] = true;
              Temp cond = vop3->definitions[0].getTemp();
  
              Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
@@ -3666,7 +3668,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
           f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
           Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u));
           Instruction* tmp0 = bld.vopc_e64(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), f32, smallest);
-         tmp0->vop3().abs[0] = true;
+         tmp0->valu().abs[0] = true;
           Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f32, bld.def(bld.lm), Operand::zero(), f32);
           cmp_res = bld.sop2(aco_opcode::s_nand_b64, bld.def(s2), bld.def(s1, scc),
                              tmp0->definitions[0].getTemp(), tmp1);
@@ -9243,8 +9245,8 @@ prepare_cube_coords(isel_context* ctx, std::vector<Temp>& coords, Temp* ddx, Tem
  
     ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
  
-   aco_ptr<VOP3_instruction> vop3a{
-      create_instruction<VOP3_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
+   aco_ptr<VALU_instruction> vop3a{
+      create_instruction<VALU_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
     vop3a->operands[0] = Operand(ma);
     vop3a->abs[0] = true;
     Temp invma = bld.tmp(v1);
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp

index c4f3367..a657f4d 100644 (file)
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -231,7 +231,7 @@ can_use_SDWA(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr, bool pr
        return true;
  
     if (instr->isVOP3()) {
-      VOP3_instruction& vop3 = instr->vop3();
+      VALU_instruction& vop3 = instr->valu();
        if (instr->format == Format::VOP3)
           return false;
        if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8)
@@ -303,7 +303,7 @@ convert_to_SDWA(amd_gfx_level gfx_level, aco_ptr<Instruction>& instr)
     SDWA_instruction& sdwa = instr->sdwa();
  
     if (tmp->isVOP3()) {
-      VOP3_instruction& vop3 = tmp->vop3();
+      VALU_instruction& vop3 = tmp->valu();
        memcpy(sdwa.neg, vop3.neg, sizeof(sdwa.neg));
        memcpy(sdwa.abs, vop3.abs, sizeof(sdwa.abs));
        sdwa.omod = vop3.omod;
@@ -354,7 +354,7 @@ can_use_DPP(const aco_ptr<Instruction>& instr, bool pre_ra, bool dpp8)
        return false;
  
     if (instr->isVOP3()) {
-      const VOP3_instruction* vop3 = &instr->vop3();
+      const VALU_instruction* vop3 = &instr->valu();
        if (vop3->clamp || vop3->omod || vop3->opsel)
           return false;
        if (dpp8)
@@ -405,7 +405,7 @@ convert_to_DPP(aco_ptr<Instruction>& instr, bool dpp8)
        dpp->bank_mask = 0xf;
  
        if (tmp->isVOP3()) {
-         const VOP3_instruction* vop3 = &tmp->vop3();
+         const VALU_instruction* vop3 = &tmp->valu();
           memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg));
           memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs));
        }
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h

index f426f21..d4b81ce 100644 (file)
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1014,12 +1014,7 @@ struct Pseudo_branch_instruction;
  struct Pseudo_barrier_instruction;
  struct Pseudo_reduction_instruction;
  struct VALU_instruction;
-typedef VALU_instruction VOP3P_instruction;
  struct VINTERP_inreg_instruction;
-typedef VALU_instruction VOP1_instruction;
-typedef VALU_instruction VOP2_instruction;
-typedef VALU_instruction VOPC_instruction;
-typedef VALU_instruction VOP3_instruction;
  struct VINTRP_instruction;
  struct DPP16_instruction;
  struct DPP8_instruction;
@@ -1253,16 +1248,6 @@ struct Instruction {
        return *(Pseudo_reduction_instruction*)this;
     }
     constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
-   VOP3P_instruction& vop3p() noexcept
-   {
-      assert(isVOP3P());
-      return *(VOP3P_instruction*)this;
-   }
-   const VOP3P_instruction& vop3p() const noexcept
-   {
-      assert(isVOP3P());
-      return *(VOP3P_instruction*)this;
-   }
     constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
     VINTERP_inreg_instruction& vinterp_inreg() noexcept
     {
@@ -1275,49 +1260,9 @@ struct Instruction {
        return *(VINTERP_inreg_instruction*)this;
     }
     constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
-   VOP1_instruction& vop1() noexcept
-   {
-      assert(isVOP1());
-      return *(VOP1_instruction*)this;
-   }
-   const VOP1_instruction& vop1() const noexcept
-   {
-      assert(isVOP1());
-      return *(VOP1_instruction*)this;
-   }
     constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
-   VOP2_instruction& vop2() noexcept
-   {
-      assert(isVOP2());
-      return *(VOP2_instruction*)this;
-   }
-   const VOP2_instruction& vop2() const noexcept
-   {
-      assert(isVOP2());
-      return *(VOP2_instruction*)this;
-   }
     constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
-   VOPC_instruction& vopc() noexcept
-   {
-      assert(isVOPC());
-      return *(VOPC_instruction*)this;
-   }
-   const VOPC_instruction& vopc() const noexcept
-   {
-      assert(isVOPC());
-      return *(VOPC_instruction*)this;
-   }
     constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
-   VOP3_instruction& vop3() noexcept
-   {
-      assert(isVOP3());
-      return *(VOP3_instruction*)this;
-   }
-   const VOP3_instruction& vop3() const noexcept
-   {
-      assert(isVOP3());
-      return *(VOP3_instruction*)this;
-   }
     constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
     VINTRP_instruction& vintrp() noexcept
     {
@@ -1814,7 +1759,7 @@ Instruction::usesModifiers() const noexcept
        return true;
  
     if (isVOP3P()) {
-      const VOP3P_instruction& vop3p = this->vop3p();
+      const VALU_instruction& vop3p = this->valu();
        for (unsigned i = 0; i < operands.size(); i++) {
           if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
              return true;
@@ -1825,7 +1770,7 @@ Instruction::usesModifiers() const noexcept
        }
        return vop3p.opsel_lo || vop3p.clamp;
     } else if (isVOP3()) {
-      const VOP3_instruction& vop3 = this->vop3();
+      const VALU_instruction& vop3 = this->valu();
        for (unsigned i = 0; i < operands.size(); i++) {
           if (vop3.abs[i] || vop3.neg[i])
              return true;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp

index 3477563..af26ead 100644 (file)
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -655,7 +655,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
                          Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu),
                          Operand::c32(0xffffffffu))
                    .instr;
-            perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
+            perm->valu().opsel = 1; /* FI (Fetch Inactive) */
           }
           bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand::c64(UINT64_MAX));
  
@@ -789,7 +789,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c
                          Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu),
                          Operand::c32(0xffffffffu))
                    .instr;
-            perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
+            perm->valu().opsel = 1; /* FI (Fetch Inactive) */
           }
           emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
  
@@ -1110,7 +1110,7 @@ emit_v_mov_b16(Builder& bld, Definition dst, Operand op)
           /* v_add_f16 is smaller because it can use 16bit fp inline constants. */
           Instruction* instr = bld.vop2_e64(aco_opcode::v_add_f16, dst, op, Operand::zero());
           if (dst.physReg().byte() == 2)
-            instr->vop3().opsel = 0x8;
+            instr->valu().opsel = 0x8;
           return;
        }
        op = Operand::c32((int32_t)(int16_t)op.constantValue());
@@ -1122,9 +1122,9 @@ emit_v_mov_b16(Builder& bld, Definition dst, Operand op)
        // TODO: this can use VOP1 for vgpr0-127 with assembler support
        Instruction* instr = bld.vop1_e64(aco_opcode::v_mov_b16, dst, op);
        if (op.physReg().byte() == 2)
-         instr->vop3().opsel |= 0x1;
+         instr->valu().opsel |= 0x1;
        if (dst.physReg().byte() == 2)
-         instr->vop3().opsel |= 0x8;
+         instr->valu().opsel |= 0x8;
     }
  }
  
@@ -1226,12 +1226,12 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
           if (dst.physReg().byte() == 2) {
              Operand def_lo(dst.physReg().advance(-2), v2b);
              Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
-            instr->vop3().opsel = 0;
+            instr->valu().opsel = 0;
           } else {
              assert(dst.physReg().byte() == 0);
              Operand def_hi(dst.physReg().advance(2), v2b);
              Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
-            instr->vop3().opsel = 2;
+            instr->valu().opsel = 2;
           }
        } else if (can_use_perm) {
           uint8_t swiz[] = {4, 5, 6, 7};
@@ -1307,11 +1307,11 @@ addsub_subdword_gfx11(Builder& bld, Definition dst, Operand src0, Operand src1,
     Instruction* instr =
        bld.vop3(sub ? aco_opcode::v_sub_u16_e64 : aco_opcode::v_add_u16_e64, dst, src0, src1).instr;
     if (src0.physReg().byte() == 2)
-      instr->vop3().opsel |= 0x1;
+      instr->valu().opsel |= 0x1;
     if (src1.physReg().byte() == 2)
-      instr->vop3().opsel |= 0x2;
+      instr->valu().opsel |= 0x2;
     if (dst.physReg().byte() == 2)
-      instr->vop3().opsel |= 0x8;
+      instr->valu().opsel |= 0x8;
  }
  
  bool
@@ -1555,7 +1555,7 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera
     if (can_use_pack) {
        Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
        /* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
-      instr->vop3().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
+      instr->valu().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
        return;
     }
  
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp

index db977fe..4ce05c2 100644 (file)
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -85,7 +85,7 @@ struct InstrHash {
     std::size_t operator()(Instruction* instr) const
     {
        if (instr->isVOP3())
-         return hash_murmur_32<VOP3_instruction>(instr);
+         return hash_murmur_32<VALU_instruction>(instr);
  
        if (instr->isDPP16())
           return hash_murmur_32<DPP16_instruction>(instr);
@@ -168,8 +168,8 @@ struct InstrPred {
           return a->pass_flags == b->pass_flags;
  
        if (a->isVOP3()) {
-         VOP3_instruction& a3 = a->vop3();
-         VOP3_instruction& b3 = b->vop3();
+         VALU_instruction& a3 = a->valu();
+         VALU_instruction& b3 = b->valu();
           for (unsigned i = 0; i < 3; i++) {
              if (a3.abs[i] != b3.abs[i] || a3.neg[i] != b3.neg[i])
                 return false;
@@ -233,8 +233,8 @@ struct InstrPred {
           return true;
        }
        case Format::VOP3P: {
-         VOP3P_instruction& a3P = a->vop3p();
-         VOP3P_instruction& b3P = b->vop3p();
+         VALU_instruction& a3P = a->valu();
+         VALU_instruction& b3P = b->valu();
           for (unsigned i = 0; i < 3; i++) {
              if (a3P.neg_lo[i] != b3P.neg_lo[i] || a3P.neg_hi[i] != b3P.neg_hi[i])
                 return false;
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp

index eb976b9..bafb55c 100644 (file)
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -640,7 +640,7 @@ to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  
     aco_ptr<Instruction> tmp = std::move(instr);
     Format format = asVOP3(tmp->format);
-   instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(),
+   instr.reset(create_instruction<VALU_instruction>(tmp->opcode, format, tmp->operands.size(),
                                                      tmp->definitions.size()));
     std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
     for (unsigned i = 0; i < instr->definitions.size(); i++) {
@@ -940,7 +940,7 @@ get_operand_size(aco_ptr<Instruction>& instr, unsigned index)
        return index == 2 ? 64 : 32;
     else if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
              instr->opcode == aco_opcode::v_fma_mixlo_f16)
-      return instr->vop3p().opsel_hi & (1u << index) ? 16 : 32;
+      return instr->valu().opsel_hi & (1u << index) ? 16 : 32;
     else if (instr->isVALU() || instr->isSALU())
        return instr_info.operand_size[(int)instr->opcode];
     else
@@ -978,7 +978,7 @@ propagate_constants_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr, ssa_info& i
        return;
  
     /* try to fold inline constants */
-   VOP3P_instruction* vop3p = &instr->vop3p();
+   VALU_instruction* vop3p = &instr->valu();
     bool opsel_lo = (vop3p->opsel_lo >> i) & 1;
     bool opsel_hi = (vop3p->opsel_hi >> i) & 1;
  
@@ -1128,7 +1128,7 @@ can_apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_i
        return true;
     } else if (instr->isVOP3() && sel.size() == 2 &&
                can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
-              !(instr->vop3().opsel & (1 << idx))) {
+              !(instr->valu().opsel & (1 << idx))) {
        return true;
     } else if (instr->opcode == aco_opcode::p_extract) {
        SubdwordSel instrSel = parse_extract(instr.get());
@@ -1182,12 +1182,12 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
                (instr->operands[!idx].is16bit() ||
                 instr->operands[!idx].constantValue() <= UINT16_MAX)) {
        Instruction* mad =
-         create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u16, Format::VOP3, 3, 1);
+         create_instruction<VALU_instruction>(aco_opcode::v_mad_u32_u16, Format::VOP3, 3, 1);
        mad->definitions[0] = instr->definitions[0];
        mad->operands[0] = instr->operands[0];
        mad->operands[1] = instr->operands[1];
        mad->operands[2] = Operand::zero();
-      mad->vop3().opsel = (sel.offset() / 2) << idx;
+      mad->valu().opsel = (sel.offset() / 2) << idx;
        instr.reset(mad);
     } else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
                (tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
@@ -1195,7 +1195,7 @@ apply_extract(opt_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, ssa_info&
        static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
     } else if (instr->isVOP3()) {
        if (sel.offset())
-         instr->vop3().opsel |= 1 << idx;
+         instr->valu().opsel |= 1 << idx;
     } else if (instr->opcode == aco_opcode::p_extract) {
        SubdwordSel instrSel = parse_extract(instr.get());
  
@@ -1871,7 +1871,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                   instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u))) { /* -1.0 */
                 bool neg1 = instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u);
  
-               VOP3_instruction* vop3 = instr->isVOP3() ? &instr->vop3() : NULL;
+               VALU_instruction* vop3 = instr->isVOP3() ? &instr->valu() : NULL;
                 if (vop3 && (vop3->abs[!i] || vop3->neg[!i] || vop3->clamp || vop3->omod))
                    continue;
  
@@ -1918,7 +1918,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        break;
     case aco_opcode::v_med3_f16:
     case aco_opcode::v_med3_f32: { /* clamp */
-      VOP3_instruction& vop3 = instr->vop3();
+      VALU_instruction& vop3 = instr->valu();
        if (vop3.abs[0] || vop3.abs[1] || vop3.abs[2] || vop3.neg[0] || vop3.neg[1] || vop3.neg[2] ||
            vop3.omod != 0 || vop3.opsel != 0)
           break;
@@ -2260,7 +2260,7 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           return false;
  
        if (op_instr[i]->isVOP3()) {
-         VOP3_instruction& vop3 = op_instr[i]->vop3();
+         VALU_instruction& vop3 = op_instr[i]->valu();
           if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 ||
               vop3.opsel == 2)
              return false;
@@ -2294,8 +2294,8 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
     }
     Instruction* new_instr;
     if (neg[0] || neg[1] || abs[0] || abs[1] || opsel || num_sgprs > 1) {
-      VOP3_instruction* vop3 =
-         create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+      VALU_instruction* vop3 =
+         create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
        for (unsigned i = 0; i < 2; i++) {
           vop3->neg[i] = neg[i];
           vop3->abs[i] = abs[i];
@@ -2303,7 +2303,7 @@ combine_ordering_test(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        vop3->opsel = opsel;
        new_instr = static_cast<Instruction*>(vop3);
     } else {
-      new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
+      new_instr = create_instruction<VALU_instruction>(new_op, Format::VOPC, 2, 1);
     }
     new_instr->operands[0] = copy_operand(ctx, Operand(op[0]));
     new_instr->operands[1] = copy_operand(ctx, Operand(op[1]));
@@ -2365,9 +2365,9 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
     aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
     Instruction* new_instr;
     if (cmp->isVOP3()) {
-      VOP3_instruction* new_vop3 =
-         create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
-      VOP3_instruction& cmp_vop3 = cmp->vop3();
+      VALU_instruction* new_vop3 =
+         create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+      VALU_instruction& cmp_vop3 = cmp->valu();
        memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
        memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
        new_vop3->clamp = cmp_vop3.clamp;
@@ -2375,7 +2375,7 @@ combine_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        new_vop3->opsel = cmp_vop3.opsel;
        new_instr = new_vop3;
     } else {
-      new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
+      new_instr = create_instruction<VALU_instruction>(new_op, Format::VOPC, 2, 1);
     }
     new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
     new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
@@ -2540,7 +2540,7 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        return false;
  
     if (nan_test->isVOP3()) {
-      VOP3_instruction& vop3 = nan_test->vop3();
+      VALU_instruction& vop3 = nan_test->valu();
        if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 ||
            vop3.opsel == 2)
           return false;
@@ -2566,9 +2566,9 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
     aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
     Instruction* new_instr;
     if (cmp->isVOP3()) {
-      VOP3_instruction* new_vop3 =
-         create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
-      VOP3_instruction& cmp_vop3 = cmp->vop3();
+      VALU_instruction* new_vop3 =
+         create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+      VALU_instruction& cmp_vop3 = cmp->valu();
        memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
        memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
        new_vop3->clamp = cmp_vop3.clamp;
@@ -2576,7 +2576,7 @@ combine_constant_comparison_ordering(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        new_vop3->opsel = cmp_vop3.opsel;
        new_instr = new_vop3;
     } else {
-      new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
+      new_instr = create_instruction<VALU_instruction>(new_op, Format::VOPC, 2, 1);
     }
     new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
     new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
@@ -2635,8 +2635,8 @@ match_op3_for_vop3(opt_ctx& ctx, aco_opcode op1, aco_opcode op2, Instruction* op
     if (!op2_instr || op2_instr->opcode != op2)
        return false;
  
-   VOP3_instruction* op1_vop3 = op1_instr->isVOP3() ? &op1_instr->vop3() : NULL;
-   VOP3_instruction* op2_vop3 = op2_instr->isVOP3() ? &op2_instr->vop3() : NULL;
+   VALU_instruction* op1_vop3 = op1_instr->isVOP3() ? &op1_instr->valu() : NULL;
+   VALU_instruction* op2_vop3 = op2_instr->isVOP3() ? &op2_instr->valu() : NULL;
  
     if (op1_instr->isSDWA() || op2_instr->isSDWA())
        return false;
@@ -2699,7 +2699,7 @@ create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>& instr
                      Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel, bool clamp,
                      unsigned omod)
  {
-   VOP3_instruction* new_instr = create_instruction<VOP3_instruction>(opcode, Format::VOP3, 3, 1);
+   VALU_instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
     memcpy(new_instr->abs, abs, sizeof(bool[3]));
     memcpy(new_instr->neg, neg, sizeof(bool[3]));
     new_instr->clamp = clamp;
@@ -2795,7 +2795,7 @@ combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        uint8_t opsel = 0, omod = 0;
        bool clamp = false;
        if (instr->isVOP3())
-         clamp = instr->vop3().clamp;
+         clamp = instr->valu().clamp;
  
        ctx.uses[instr->operands[i].tempId()]--;
        create_vop3_for_op3(ctx, op, instr, operands, neg, abs, opsel, clamp, omod);
@@ -3164,11 +3164,11 @@ combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode new_op
           aco_ptr<Instruction> new_instr;
           if (instr->operands[!i].isTemp() &&
               instr->operands[!i].getTemp().type() == RegType::vgpr) {
-            new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2));
+            new_instr.reset(create_instruction<VALU_instruction>(new_op, Format::VOP2, 3, 2));
           } else if (ctx.program->gfx_level >= GFX10 ||
                      (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
              new_instr.reset(
-               create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
+               create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
           } else {
              return false;
           }
@@ -3209,7 +3209,7 @@ combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
            op_instr->operands[0].getTemp().type() == RegType::vgpr &&
            op_instr->operands[1].constantEquals(0)) {
           aco_ptr<Instruction> new_instr{
-            create_instruction<VOP3_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
+            create_instruction<VALU_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
           ctx.uses[instr->operands[i].tempId()]--;
           new_instr->operands[0] = op_instr->operands[0];
           new_instr->operands[1] = instr->operands[!i];
@@ -3669,10 +3669,10 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           if (instr->operands[!i].isTemp() &&
               instr->operands[!i].getTemp().type() == RegType::vgpr) {
              new_instr.reset(
-               create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
+               create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
           } else if (ctx.program->gfx_level >= GFX10 ||
                      (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
-            new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32,
+            new_instr.reset(create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32,
                                                                   asVOP3(Format::VOP2), 3, 1));
           } else {
              return false;
@@ -3744,8 +3744,8 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
           ctx.uses[instr->operands[i].tempId()]--;
  
           aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24;
-         aco_ptr<VOP3_instruction> new_instr{
-            create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)};
+         aco_ptr<VALU_instruction> new_instr{
+            create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1)};
           for (unsigned op_idx = 0; op_idx < 3; ++op_idx)
              new_instr->operands[op_idx] = ops[op_idx];
           new_instr->definitions[0] = instr->definitions[0];
@@ -3759,7 +3759,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr, bool is_sub)
  }
  
  void
-propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
+propagate_swizzles(VALU_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
  {
     /* propagate swizzles which apply to a result down to the instruction's operands:
      * result = a.xy + b.xx -> result.yx = a.yx + b.xx */
@@ -3784,7 +3784,7 @@ propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
  void
  combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  {
-   VOP3P_instruction* vop3p = &instr->vop3p();
+   VALU_instruction* vop3p = &instr->valu();
  
     /* apply clamp */
     if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) &&
@@ -3793,7 +3793,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  
        ssa_info& info = ctx.info[instr->operands[0].tempId()];
        if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
-         VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
+         VALU_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->valu();
           candidate->clamp = true;
           propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi);
           instr->definitions[0].swapTemp(candidate->definitions[0]);
@@ -3814,7 +3814,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 &&
               info.instr->operands[1].constantEquals(0x3C00)) {
  
-            VOP3P_instruction* fneg = &info.instr->vop3p();
+            VALU_instruction* fneg = &info.instr->valu();
  
              if ((fneg->opsel_lo | fneg->opsel_hi) & 2)
                 continue;
@@ -3878,7 +3878,7 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
              continue;
  
           /* no clamp allowed between mul and add */
-         if (info.instr->vop3p().clamp)
+         if (info.instr->valu().clamp)
              continue;
  
           mul_instr = info.instr;
@@ -3904,9 +3904,8 @@ combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        /* turn packed mul+add into v_pk_fma_f16 */
        assert(mul_instr->isVOP3P());
        aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16;
-      aco_ptr<VOP3P_instruction> fma{
-         create_instruction<VOP3P_instruction>(mad, Format::VOP3P, 3, 1)};
-      VOP3P_instruction* mul = &mul_instr->vop3p();
+      aco_ptr<VALU_instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
+      VALU_instruction* mul = &mul_instr->valu();
        for (unsigned i = 0; i < 2; i++) {
           fma->operands[i] = op[i];
           fma->neg_lo[i] = mul->neg_lo[i];
@@ -3956,7 +3955,7 @@ can_use_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        return false;
  
     if (instr->isVOP3())
-      return !instr->vop3().omod && !(instr->vop3().opsel & 0x8);
+      return !instr->valu().omod && !(instr->valu().opsel & 0x8);
  
     return instr->format == Format::VOP2;
  }
@@ -3966,10 +3965,10 @@ to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  {
     bool is_add = instr->opcode != aco_opcode::v_mul_f32 && instr->opcode != aco_opcode::v_fma_f32;
  
-   aco_ptr<VOP3P_instruction> vop3p{
-      create_instruction<VOP3P_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
+   aco_ptr<VALU_instruction> vop3p{
+      create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
  
-   vop3p->opsel_lo = instr->isVOP3() ? ((instr->vop3().opsel & 0x7) << (is_add ? 1 : 0)) : 0x0;
+   vop3p->opsel_lo = instr->isVOP3() ? ((instr->valu().opsel & 0x7) << (is_add ? 1 : 0)) : 0x0;
     vop3p->opsel_hi = 0x0;
     for (unsigned i = 0; i < instr->operands.size(); i++) {
        vop3p->operands[is_add + i] = instr->operands[i];
@@ -3990,7 +3989,7 @@ to_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           vop3p->neg_lo[1] ^= true;
     }
     vop3p->definitions[0] = instr->definitions[0];
-   vop3p->clamp = instr->isVOP3() && instr->vop3().clamp;
+   vop3p->clamp = instr->isVOP3() && instr->valu().clamp;
     instr = std::move(vop3p);
  
     ctx.info[instr->definitions[0].tempId()].label &= label_f2f16 | label_clamp | label_mul;
@@ -4045,7 +4044,7 @@ combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        if (conv->isSDWA() && (conv->sdwa().dst_sel.size() != 4 || conv->sdwa().sel[0].size() != 2 ||
                               conv->sdwa().clamp || conv->sdwa().omod)) {
           continue;
-      } else if (conv->isVOP3() && (conv->vop3().clamp || conv->vop3().omod)) {
+      } else if (conv->isVOP3() && (conv->valu().clamp || conv->valu().omod)) {
           continue;
        } else if (conv->isDPP()) {
           continue;
@@ -4075,14 +4074,14 @@ combine_mad_mix(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        instr->operands[i].setTemp(conv->operands[0].getTemp());
        if (conv->definitions[0].isPrecise())
           instr->definitions[0].setPrecise(true);
-      instr->vop3p().opsel_hi ^= 1u << i;
+      instr->valu().opsel_hi ^= 1u << i;
        if (conv->isSDWA() && conv->sdwa().sel[0].offset() == 2)
-         instr->vop3p().opsel_lo |= 1u << i;
+         instr->valu().opsel_lo |= 1u << i;
        bool neg = conv->valu().neg[0];
        bool abs = conv->valu().abs[0];
-      if (!instr->vop3p().abs[i]) {
-         instr->vop3p().neg[i] ^= neg;
-         instr->vop3p().abs[i] = abs;
+      if (!instr->valu().abs[i]) {
+         instr->valu().neg[i] ^= neg;
+         instr->valu().abs[i] = abs;
        }
     }
  }
@@ -4205,7 +4204,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  
        if (mul_instr->operands[0].isLiteral())
           return;
-      if (mul_instr->isVOP3() && mul_instr->vop3().clamp)
+      if (mul_instr->isVOP3() && mul_instr->valu().clamp)
           return;
        if (mul_instr->isSDWA() || mul_instr->isDPP() || mul_instr->isVOP3P())
           return;
@@ -4221,13 +4220,13 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
        bool is_neg = ctx.info[instr->definitions[0].tempId()].is_neg();
        bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
        instr.reset(
-         create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
+         create_instruction<VALU_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
        instr->operands[0] = mul_instr->operands[0];
        instr->operands[1] = mul_instr->operands[1];
        instr->definitions[0] = def;
-      VOP3_instruction& new_mul = instr->vop3();
+      VALU_instruction& new_mul = instr->valu();
        if (mul_instr->isVOP3()) {
-         VOP3_instruction& mul = mul_instr->vop3();
+         VALU_instruction& mul = mul_instr->valu();
           new_mul.neg[0] = mul.neg[0];
           new_mul.neg[1] = mul.neg[1];
           new_mul.abs[0] = mul.abs[0];
@@ -4249,10 +4248,10 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
     bool is_add_mix =
        (instr->opcode == aco_opcode::v_fma_mix_f32 ||
         instr->opcode == aco_opcode::v_fma_mixlo_f16) &&
-      !instr->vop3p().neg_lo[0] &&
-      ((instr->operands[0].constantEquals(0x3f800000) && (instr->vop3p().opsel_hi & 0x1) == 0) ||
-       (instr->operands[0].constantEquals(0x3C00) && (instr->vop3p().opsel_hi & 0x1) &&
-        !(instr->vop3p().opsel_lo & 0x1)));
+      !instr->valu().neg_lo[0] &&
+      ((instr->operands[0].constantEquals(0x3f800000) && (instr->valu().opsel_hi & 0x1) == 0) ||
+       (instr->operands[0].constantEquals(0x3C00) && (instr->valu().opsel_hi & 0x1) &&
+        !(instr->valu().opsel_lo & 0x1)));
     bool mad32 = instr->opcode == aco_opcode::v_add_f32 || instr->opcode == aco_opcode::v_sub_f32 ||
                  instr->opcode == aco_opcode::v_subrev_f32;
     bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 ||
@@ -4270,12 +4269,12 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           ssa_info& info = ctx.info[instr->operands[i].tempId()];
  
           /* no clamp/omod allowed between mul and add */
-         if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
+         if (info.instr->isVOP3() && (info.instr->valu().clamp || info.instr->valu().omod))
              continue;
-         if (info.instr->isVOP3P() && info.instr->vop3p().clamp)
+         if (info.instr->isVOP3P() && info.instr->valu().clamp)
              continue;
           /* v_fma_mix_f32/etc can't do omod */
-         if (info.instr->isVOP3P() && instr->isVOP3() && instr->vop3().omod)
+         if (info.instr->isVOP3P() && instr->isVOP3() && instr->valu().omod)
              continue;
           /* don't promote fp16 to fp32 or remove fp32->fp16->fp32 conversions */
           if (is_add_mix && info.instr->definitions[0].bytes() == 2)
@@ -4388,7 +4387,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  
              aco_opcode mad_op = add_instr->definitions[0].bytes() == 2 ? aco_opcode::v_fma_mixlo_f16
                                                                         : aco_opcode::v_fma_mix_f32;
-            mad.reset(create_instruction<VOP3P_instruction>(mad_op, Format::VOP3P, 3, 1));
+            mad.reset(create_instruction<VALU_instruction>(mad_op, Format::VOP3P, 3, 1));
           } else {
              assert(!opsel_lo);
              assert(!opsel_hi);
@@ -4406,7 +4405,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                 mad_op = aco_opcode::v_fma_f64;
              }
  
-            mad.reset(create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1));
+            mad.reset(create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1));
           }
  
           for (unsigned i = 0; i < 3; i++) {
@@ -4442,8 +4441,8 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
              ctx.uses[instr->operands[i].tempId()]--;
              ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
  
-            aco_ptr<VOP2_instruction> new_instr{
-               create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
+            aco_ptr<VALU_instruction> new_instr{
+               create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
              new_instr->operands[0] = Operand::zero();
              new_instr->operands[1] = instr->operands[!i];
              new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
@@ -4767,7 +4766,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  
           if ((instr->opcode == aco_opcode::v_fma_f32 ||
                (instr->opcode == aco_opcode::v_mad_f32 && !instr->definitions[0].isPrecise())) &&
-             !instr->vop3().omod && ctx.program->gfx_level >= GFX10 &&
+             !instr->valu().omod && ctx.program->gfx_level >= GFX10 &&
               util_bitcount(fp16_mask) > std::max<uint32_t>(util_bitcount(literal_mask), 1)) {
              assert(ctx.program->dev.fused_mad_mix);
              u_foreach_bit (i, fp16_mask)
@@ -5091,7 +5090,7 @@ unswizzle_vop3p_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
     if (instr->opcode != aco_opcode::v_pk_fma_f16)
        return;
  
-   VOP3P_instruction& vop3p = instr->vop3p();
+   VALU_instruction& vop3p = instr->valu();
  
     unsigned literal_swizzle = ~0u;
     for (unsigned i = 0; i < instr->operands.size(); i++) {
@@ -5135,21 +5134,21 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
  
        if (has_dead_literal && info->fp16_mask) {
           aco_ptr<Instruction> fma_mix(
-            create_instruction<VOP3P_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1));
+            create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1));
  
-         fma_mix->vop3p().clamp = instr->vop3().clamp;
-         std::copy(std::cbegin(instr->vop3().abs), std::cend(instr->vop3().abs),
-                   std::begin(fma_mix->vop3p().neg_hi));
-         std::copy(std::cbegin(instr->vop3().neg), std::cend(instr->vop3().neg),
-                   std::begin(fma_mix->vop3p().neg_lo));
+         fma_mix->valu().clamp = instr->valu().clamp;
+         std::copy(std::cbegin(instr->valu().abs), std::cend(instr->valu().abs),
+                   std::begin(fma_mix->valu().neg_hi));
+         std::copy(std::cbegin(instr->valu().neg), std::cend(instr->valu().neg),
+                   std::begin(fma_mix->valu().neg_lo));
  
           uint32_t literal = 0;
           bool second = false;
           u_foreach_bit (i, info->fp16_mask) {
              float value = uif(ctx.info[instr->operands[i].tempId()].val);
              literal |= _mesa_float_to_half(value) << (second * 16);
-            fma_mix->vop3p().opsel_lo |= second << i;
-            fma_mix->vop3p().opsel_hi |= 1 << i;
+            fma_mix->valu().opsel_lo |= second << i;
+            fma_mix->valu().opsel_hi |= 1 << i;
              second = true;
           }
  
@@ -5178,7 +5177,7 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
              new_op = madak ? aco_opcode::v_fmaak_f16 : aco_opcode::v_fmamk_f16;
  
           uint32_t literal = ctx.info[instr->operands[ffs(info->literal_mask) - 1].tempId()].val;
-         new_mad.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 1));
+         new_mad.reset(create_instruction<VALU_instruction>(new_op, Format::VOP2, 3, 1));
           for (unsigned i = 0; i < 3; i++) {
              if (info->literal_mask & (1 << i))
                 new_mad->operands[i] = Operand::literal32(literal);
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp

index 74fb7e5..623538b 100644 (file)
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -749,7 +749,7 @@ aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* ou
              opsel[i] = valu.opsel & (1 << i);
           }
        } else if (instr->isVOP3P() && is_mad_mix) {
-         const VOP3P_instruction& vop3p = instr->vop3p();
+         const VALU_instruction& vop3p = instr->valu();
           for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
              abs[i] = vop3p.neg_hi[i];
              neg[i] = vop3p.neg_lo[i];
@@ -778,7 +778,7 @@ aco_print_instr(enum amd_gfx_level gfx_level, const Instruction* instr, FILE* ou
              fprintf(output, "|");
  
           if (instr->isVOP3P() && !is_mad_mix) {
-            const VOP3P_instruction& vop3 = instr->vop3p();
+            const VALU_instruction& vop3 = instr->valu();
              if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
                 fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x',
                         vop3.opsel_hi & (1 << i) ? 'y' : 'x');
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp

index 5a7adb3..af055ab 100644 (file)
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -543,7 +543,7 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, uns
        /* check if we can use opsel */
        if (instr->format == Format::VOP3) {
           assert(byte == 2);
-         instr->vop3().opsel |= 1 << idx;
+         instr->valu().opsel |= 1 << idx;
           return;
        }
        if (instr->isVINTERP_INREG()) {
@@ -552,9 +552,9 @@ add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx, uns
           return;
        }
        if (instr->isVOP3P()) {
-         assert(byte == 2 && !(instr->vop3p().opsel_lo & (1 << idx)));
-         instr->vop3p().opsel_lo |= 1 << idx;
-         instr->vop3p().opsel_hi |= 1 << idx;
+         assert(byte == 2 && !(instr->valu().opsel_lo & (1 << idx)));
+         instr->valu().opsel_lo |= 1 << idx;
+         instr->valu().opsel_hi |= 1 << idx;
           return;
        }
        if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
@@ -695,7 +695,7 @@ add_subdword_definition(Program* program, aco_ptr<Instruction>& instr, PhysReg r
        if (instr->format == Format::VOP3) {
           assert(reg.byte() == 2);
           assert(can_use_opsel(gfx_level, instr->opcode, -1));
-         instr->vop3().opsel |= (1 << 3); /* dst in high half */
+         instr->valu().opsel |= (1 << 3); /* dst in high half */
           return;
        } else if (instr->isVINTERP_INREG()) {
           assert(reg.byte() == 2);
@@ -2632,10 +2632,6 @@ optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_fil
           return;
     }
  
-   static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
-                 "Invalid direct instruction cast.");
-   static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
-                 "Invalid direct instruction cast.");
     instr->format = Format::VOP2;
     instr->valu().opsel_hi = 0;
     switch (instr->opcode) {
@@ -3120,7 +3116,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
                    mov.reset(create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32,
                                                                   Format::SOP1, 1, 1));
                 else
-                  mov.reset(create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32,
+                  mov.reset(create_instruction<VALU_instruction>(aco_opcode::v_mov_b32,
                                                                   Format::VOP1, 1, 1));
                 mov->operands[0] = instr->operands[0];
                 mov->definitions[0] = Definition(tmp);
@@ -3136,7 +3132,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
              /* change the instruction to VOP3 to enable an arbitrary register pair as dst */
              aco_ptr<Instruction> tmp = std::move(instr);
              Format format = asVOP3(tmp->format);
-            instr.reset(create_instruction<VOP3_instruction>(
+            instr.reset(create_instruction<VALU_instruction>(
                 tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
              std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin());
              std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin());
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp

index 52e0d36..f6fac86 100644 (file)
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -326,7 +326,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id)
  
        aco_ptr<Instruction> res;
        if (instr->isVOP1()) {
-         res.reset(create_instruction<VOP1_instruction>(
+         res.reset(create_instruction<VALU_instruction>(
              instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
        } else if (instr->isSOP1()) {
           res.reset(create_instruction<SOP1_instruction>(
diff --git a/src/amd/compiler/aco_ssa_elimination.cpp b/src/amd/compiler/aco_ssa_elimination.cpp

index 9a1a3dc..3ec17f8 100644 (file)
--- a/src/amd/compiler/aco_ssa_elimination.cpp
+++ b/src/amd/compiler/aco_ssa_elimination.cpp
@@ -461,21 +461,21 @@ try_optimize_branching_sequence(ssa_elimination_ctx& ctx, Block& block, const in
              return;
           } else if (!exec_val->isVOP3()) {
              aco_ptr<Instruction> tmp = std::move(exec_val);
-            exec_val.reset(create_instruction<VOPC_instruction>(
+            exec_val.reset(create_instruction<VALU_instruction>(
                 tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
              std::copy(tmp->operands.cbegin(), tmp->operands.cend(), exec_val->operands.begin());
              std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(),
                        exec_val->definitions.begin());
           } else {
              aco_ptr<Instruction> tmp = std::move(exec_val);
-            exec_val.reset(create_instruction<VOP3_instruction>(
+            exec_val.reset(create_instruction<VALU_instruction>(
                 tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
              std::copy(tmp->operands.cbegin(), tmp->operands.cend(), exec_val->operands.begin());
              std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(),
                        exec_val->definitions.begin());
  
-            VOP3_instruction& src = tmp->vop3();
-            VOP3_instruction& dst = exec_val->vop3();
+            VALU_instruction& src = tmp->valu();
+            VALU_instruction& dst = exec_val->valu();
              dst.opsel = src.opsel;
              dst.omod = src.omod;
              dst.clamp = src.clamp;
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp

index 1b5ed3e..996b49c 100644 (file)
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -224,7 +224,7 @@ validate_ir(Program* program)
  
           /* check opsel */
           if (instr->isVOP3()) {
-            VOP3_instruction& vop3 = instr->vop3();
+            VALU_instruction& vop3 = instr->valu();
              check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
                    instr.get());
  
@@ -244,7 +244,7 @@ validate_ir(Program* program)
                       (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
                    "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
           } else if (instr->isVOP3P()) {
-            VOP3P_instruction& vop3p = instr->vop3p();
+            VALU_instruction& vop3p = instr->valu();
              for (unsigned i = 0; i < instr->operands.size(); i++) {
                 if (instr->operands[i].hasRegClass() &&
                     instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
@@ -866,8 +866,8 @@ validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& i
        bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
                       instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
                       instr->opcode == aco_opcode::v_fma_mix_f32;
-      return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
-             ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
+      return ((instr->valu().opsel_lo >> index) & 1) == (byte >> 1) &&
+             ((instr->valu().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
     }
     if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
        return true;
diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp

index 85365a5..68158ab 100644 (file)
--- a/src/amd/compiler/tests/helpers.cpp
+++ b/src/amd/compiler/tests/helpers.cpp
@@ -302,11 +302,11 @@ Temp fabs(Temp src, Builder b)
  {
     if (src.bytes() == 2) {
        Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
-      res->vop3().abs[1] = true;
+      res->valu().abs[1] = true;
        return res;
     } else {
        Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
-      res->vop3().abs[1] = true;
+      res->valu().abs[1] = true;
        return res;
     }
  }
diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp

index 8cc06f9..82f9038 100644 (file)
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -269,7 +269,8 @@ BEGIN_TEST(assembler.v_add3)
  
        //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
        //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
-      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
+      aco_ptr<VALU_instruction> add3{
+         create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
        add3->operands[0] = Operand::zero();
        add3->operands[1] = Operand::zero();
        add3->operands[2] = Operand::zero();
@@ -287,7 +288,8 @@ BEGIN_TEST(assembler.v_add3_clamp)
  
        //~gfx9>> integer addition + clamp ; d1ff8000 02010080
        //~gfx10>> integer addition + clamp ; d76d8000 02010080
-      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
+      aco_ptr<VALU_instruction> add3{
+         create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
        add3->operands[0] = Operand::zero();
        add3->operands[1] = Operand::zero();
        add3->operands[2] = Operand::zero();
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp

index 7c109ed..cc8a09d 100644 (file)
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -698,7 +698,7 @@ BEGIN_TEST(optimize.add3)
     //! v1: %res1 = v_add_u32 %a, %tmp1
     //! p_unit_test 1, %res1
     tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
-   tmp->vop3().clamp = true;
+   tmp->valu().clamp = true;
     writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
  
     //! v1: %tmp2 = v_add_u32 %b, %c
@@ -706,7 +706,7 @@ BEGIN_TEST(optimize.add3)
     //! p_unit_test 2, %res2
     tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
     tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
-   tmp->vop3().clamp = true;
+   tmp->valu().clamp = true;
     writeout(2, tmp);
  
     finish_opt_test();
@@ -1038,7 +1038,7 @@ BEGIN_TEST(optimizer.dpp)
     //! p_unit_test 4, %res4
     Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
     auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp4, b);
-   res4->vop3().neg[0] = true;
+   res4->valu().neg[0] = true;
     writeout(4, res4);
  
     //! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1
@@ -1046,7 +1046,7 @@ BEGIN_TEST(optimizer.dpp)
     //! p_unit_test 5, %res5
     Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
     auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp5, b);
-   res5->vop3().clamp = true;
+   res5->valu().clamp = true;
     writeout(5, res5);
  
     //! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1
@@ -1054,14 +1054,14 @@ BEGIN_TEST(optimizer.dpp)
     auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
     tmp6->dpp16().neg[0] = true;
     auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp6, b);
-   res6->vop3().abs[0] = true;
+   res6->valu().abs[0] = true;
     writeout(6, res6);
  
     //! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1
     //! p_unit_test 7, %res7
     Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
     auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), b, tmp7);
-   res7->vop3().abs[0] = true;
+   res7->valu().abs[0] = true;
     writeout(7, res7);
  
     //! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp

index 066f74f..d6cc320 100644 (file)
--- a/src/amd/compiler/tests/test_optimizer_postRA.cpp
+++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp
@@ -383,7 +383,7 @@ BEGIN_TEST(optimizer_postRA.dpp)
     //! p_unit_test 4, %res4:v[2]
     Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
     auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
-   res4->vop3().neg[0] = true;
+   res4->valu().neg[0] = true;
     writeout(4, Operand(res4, reg_v2));
  
     //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
@@ -391,7 +391,7 @@ BEGIN_TEST(optimizer_postRA.dpp)
     //! p_unit_test 5, %res5:v[2]
     Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
     auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
-   res5->vop3().clamp = true;
+   res5->valu().clamp = true;
     writeout(5, Operand(res5, reg_v2));
  
     //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
@@ -399,14 +399,14 @@ BEGIN_TEST(optimizer_postRA.dpp)
     auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
     tmp6->dpp16().neg[0] = true;
     auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
-   res6->vop3().abs[0] = true;
+   res6->valu().abs[0] = true;
     writeout(6, Operand(res6, reg_v2));
  
     //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1
     //! p_unit_test 7, %res7:v[2]
     Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
     auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
-   res7->vop3().abs[0] = true;
+   res7->valu().abs[0] = true;
     writeout(7, Operand(res7, reg_v2));
  
     //! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp

index 3942869..45f8c3e 100644 (file)
--- a/src/amd/compiler/tests/test_sdwa.cpp
+++ b/src/amd/compiler/tests/test_sdwa.cpp
@@ -385,7 +385,8 @@ BEGIN_TEST(optimize.sdwa.from_vop3)
        //! p_unit_test 0, %res0
        Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
                                  Operand::c32(8u), Operand::zero());
-      VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->vop3();
+      VALU_instruction* mul =
+         &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->valu();
        mul->neg[0] = true;
        mul->abs[0] = true;
        writeout(0, mul->definitions[0].getTemp());
@@ -396,7 +397,7 @@ BEGIN_TEST(optimize.sdwa.from_vop3)
        //! p_unit_test 1, %res1
        byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
                             Operand::c32(8u), Operand::zero());
-      mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->vop3();
+      mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->valu();
        mul->omod = 2;
        writeout(1, mul->definitions[0].getTemp());
author	Georg Lehmann <dadschoorse@gmail.com>
	Tue, 21 Feb 2023 19:08:42 +0000 (20:08 +0100)
committer	Marge Bot <emma+marge@anholt.net>
	Tue, 7 Mar 2023 11:53:23 +0000 (11:53 +0000)
src/amd/compiler/aco_assembler.cpp		patch \| blob \| history
src/amd/compiler/aco_builder_h.py		patch \| blob \| history
src/amd/compiler/aco_instruction_selection.cpp		patch \| blob \| history
src/amd/compiler/aco_ir.cpp		patch \| blob \| history
src/amd/compiler/aco_ir.h		patch \| blob \| history
src/amd/compiler/aco_lower_to_hw_instr.cpp		patch \| blob \| history
src/amd/compiler/aco_opt_value_numbering.cpp		patch \| blob \| history
src/amd/compiler/aco_optimizer.cpp		patch \| blob \| history
src/amd/compiler/aco_print_ir.cpp		patch \| blob \| history
src/amd/compiler/aco_register_allocation.cpp		patch \| blob \| history
src/amd/compiler/aco_spill.cpp		patch \| blob \| history
src/amd/compiler/aco_ssa_elimination.cpp		patch \| blob \| history
src/amd/compiler/aco_validate.cpp		patch \| blob \| history
src/amd/compiler/tests/helpers.cpp		patch \| blob \| history
src/amd/compiler/tests/test_assembler.cpp		patch \| blob \| history
src/amd/compiler/tests/test_optimizer.cpp		patch \| blob \| history
src/amd/compiler/tests/test_optimizer_postRA.cpp		patch \| blob \| history
src/amd/compiler/tests/test_sdwa.cpp		patch \| blob \| history