* except abs/neg is ignored). src2 cannot be literal and src0/src1 must be VGPR.
*/
if (instr->isVOP3()) {
- VOP3_instruction& vop3 = instr->vop3();
+ VALU_instruction& vop3 = instr->valu();
if (instr->isVOP2()) {
opcode = opcode + 0x100;
out.push_back(encoding);
} else if (instr->isVOP3P()) {
- VOP3P_instruction& vop3 = instr->vop3p();
+ VALU_instruction& vop3 = instr->valu();
uint32_t encoding;
if (ctx.gfx_level == GFX9) {
int num_defs = carry_out ? 2 : 1;
aco_ptr<Instruction> sub;
if (vop3)
- sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs));
+ sub.reset(create_instruction<VALU_instruction>(op, Format::VOP3, num_ops, num_defs));
else
- sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs));
+ sub.reset(create_instruction<VALU_instruction>(op, Format::VOP2, num_ops, num_defs));
sub->operands[0] = a.op;
sub->operands[1] = b.op;
if (!borrow.op.isUndefined())
("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([1], [0, 1])),
("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]),
("reduction", [Format.PSEUDO_REDUCTION], 'Pseudo_reduction_instruction', [(3, 3)]),
- ("vop1", [Format.VOP1], 'VOP1_instruction', [(0, 0), (1, 1), (2, 2)]),
+ ("vop1", [Format.VOP1], 'VALU_instruction', [(0, 0), (1, 1), (2, 2)]),
("vop1_sdwa", [Format.VOP1, Format.SDWA], 'SDWA_instruction', [(1, 1)]),
- ("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])),
+ ("vop2", [Format.VOP2], 'VALU_instruction', itertools.product([1, 2], [2, 3])),
("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])),
- ("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])),
+ ("vopc", [Format.VOPC], 'VALU_instruction', itertools.product([1, 2], [2])),
("vopc_sdwa", [Format.VOPC, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2])),
- ("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
- ("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]),
+ ("vop3", [Format.VOP3], 'VALU_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
+ ("vop3p", [Format.VOP3P], 'VALU_instruction', [(1, 2), (1, 3)]),
("vinterp_inreg", [Format.VINTERP_INREG], 'VINTERP_inreg_instruction', [(1, 3)]),
("vintrp", [Format.VINTRP], 'VINTRP_instruction', [(1, 2), (1, 3)]),
("vop1_dpp", [Format.VOP1, Format.DPP16], 'DPP16_instruction', [(1, 1)]),
("vop1_dpp8", [Format.VOP1, Format.DPP8], 'DPP8_instruction', [(1, 1)]),
("vop2_dpp8", [Format.VOP2, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2, 3])),
("vopc_dpp8", [Format.VOPC, Format.DPP8], 'DPP8_instruction', itertools.product([1, 2], [2])),
- ("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])),
- ("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])),
- ("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])),
+ ("vop1_e64", [Format.VOP1, Format.VOP3], 'VALU_instruction', itertools.product([1], [1])),
+ ("vop2_e64", [Format.VOP2, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2, 3])),
+ ("vopc_e64", [Format.VOPC, Format.VOP3], 'VALU_instruction', itertools.product([1, 2], [2])),
("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)]),
("scratch", [Format.SCRATCH], 'FLAT_instruction', [(0, 3), (1, 2)])]
Temp op1 = bld.copy(bld.def(s1), Operand::c32(lane_mask & 0xffffffff));
Temp op2 = bld.copy(bld.def(s1), Operand::c32(lane_mask >> 32));
Builder::Result ret = bld.vop3(opcode, bld.def(v1), src, op1, op2);
- ret->vop3().opsel = 0x3; /* set BOUND_CTRL/FETCH_INACTIVE */
+ ret->valu().opsel = 0x3; /* set BOUND_CTRL/FETCH_INACTIVE */
return ret;
}
Builder bld(ctx->program, ctx->block);
bld.is_precise = instr->exact;
- VOP3P_instruction& vop3p =
- bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7)->vop3p();
+ VALU_instruction& vop3p =
+ bld.vop3p(op, Definition(dst), src[0], src[1], src[2], 0x0, 0x7)->valu();
vop3p.clamp = clamp;
u_foreach_bit (i, neg_lo)
vop3p.neg_lo[i] = true;
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
- add->vop3().neg[1] = true;
+ add->valu().neg[1] = true;
return add->definitions[0].getTemp();
}
} else {
add = bld.vop2_e64(aco_opcode::v_add_co_u32, dst, bld.def(bld.lm), src0, src1);
}
- add->vop3().clamp = 1;
+ add->valu().clamp = 1;
return dst.getTemp();
}
} else {
sub = bld.vop2_e64(aco_opcode::v_sub_co_u32, dst, bld.def(bld.lm), src0, src1);
}
- sub->vop3().clamp = 1;
+ sub->valu().clamp = 1;
return dst.getTemp();
}
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add_instr =
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
- add_instr->vop3p().clamp = 1;
+ add_instr->valu().clamp = 1;
break;
}
Temp src0 = get_alu_src(ctx, instr->src[0]);
add_instr =
bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
}
- add_instr->vop3().clamp = 1;
+ add_instr->valu().clamp = 1;
break;
} else if (dst.regClass() == v1) {
uadd32_sat(bld, Definition(dst), src0, src1);
carry1 = bld.tmp(bld.lm);
bld.vop2_e64(aco_opcode::v_addc_co_u32, Definition(dst1), Definition(carry1),
as_vgpr(ctx, src01), as_vgpr(ctx, src11), carry0)
- ->vop3().clamp = 1;
+ ->valu()
+ .clamp = 1;
} else {
Temp no_sat1 = bld.tmp(v1);
carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add_instr =
emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
- add_instr->vop3p().clamp = 1;
+ add_instr->valu().clamp = 1;
break;
}
Temp src0 = get_alu_src(ctx, instr->src[0]);
if (dst.regClass() == v2b) {
Instruction* add_instr =
bld.vop3(aco_opcode::v_add_i16, Definition(dst), src0, src1).instr;
- add_instr->vop3().clamp = 1;
+ add_instr->valu().clamp = 1;
} else if (dst.regClass() == v1) {
Instruction* add_instr =
bld.vop3(aco_opcode::v_add_i32, Definition(dst), src0, src1).instr;
- add_instr->vop3().clamp = 1;
+ add_instr->valu().clamp = 1;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
case nir_op_usub_sat: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_u16, dst);
- sub_instr->vop3p().clamp = 1;
+ sub_instr->valu().clamp = 1;
break;
}
Temp src0 = get_alu_src(ctx, instr->src[0]);
}
sub_instr = bld.vop2_e64(op, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
}
- sub_instr->vop3().clamp = 1;
+ sub_instr->valu().clamp = 1;
break;
} else if (dst.regClass() == v1) {
usub32_sat(bld, Definition(dst), src0, as_vgpr(ctx, src1));
carry1 = bld.tmp(bld.lm);
bld.vop2_e64(aco_opcode::v_subb_co_u32, Definition(dst1), Definition(carry1),
as_vgpr(ctx, src01), as_vgpr(ctx, src11), carry0)
- ->vop3().clamp = 1;
+ ->valu()
+ .clamp = 1;
} else {
Temp no_sat1 = bld.tmp(v1);
carry1 = bld.vsub32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
case nir_op_isub_sat: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* sub_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_sub_i16, dst);
- sub_instr->vop3p().clamp = 1;
+ sub_instr->valu().clamp = 1;
break;
}
Temp src0 = get_alu_src(ctx, instr->src[0]);
if (dst.regClass() == v2b) {
Instruction* sub_instr =
bld.vop3(aco_opcode::v_sub_i16, Definition(dst), src0, src1).instr;
- sub_instr->vop3().clamp = 1;
+ sub_instr->valu().clamp = 1;
} else if (dst.regClass() == v1) {
Instruction* sub_instr =
bld.vop3(aco_opcode::v_sub_i32, Definition(dst), src0, src1).instr;
- sub_instr->vop3().clamp = 1;
+ sub_instr->valu().clamp = 1;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
case nir_op_fsub: {
if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
Instruction* add = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_f16, dst);
- VOP3P_instruction& sub = add->vop3p();
+ VALU_instruction& sub = add->valu();
sub.neg_lo[1] = true;
sub.neg_hi[1] = true;
break;
} else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), as_vgpr(ctx, src0),
as_vgpr(ctx, src1));
- add->vop3().neg[1] = true;
+ add->valu().neg[1] = true;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
Instruction* vop3p =
bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
- vop3p->vop3p().neg_lo[0] = true;
- vop3p->vop3p().neg_hi[0] = true;
+ vop3p->valu().neg_lo[0] = true;
+ vop3p->valu().neg_hi[0] = true;
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
bld.vop3p(aco_opcode::v_pk_max_f16, Definition(dst), src, src,
instr->src[0].swizzle[0] & 1 ? 3 : 0, instr->src[0].swizzle[1] & 1 ? 3 : 0)
.instr;
- vop3p->vop3p().neg_lo[1] = true;
- vop3p->vop3p().neg_hi[1] = true;
+ vop3p->valu().neg_lo[1] = true;
+ vop3p->valu().neg_hi[1] = true;
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst),
Operand::c16(0x3c00), as_vgpr(ctx, src))
.instr;
- mul->vop3().abs[1] = true;
+ mul->valu().abs[1] = true;
} else if (dst.regClass() == v1) {
Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f32, Definition(dst),
Operand::c32(0x3f800000u), as_vgpr(ctx, src))
.instr;
- mul->vop3().abs[1] = true;
+ mul->valu().abs[1] = true;
} else if (dst.regClass() == v2) {
if (ctx->block->fp_mode.must_flush_denorms16_64)
src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), Operand::c64(0x3FF0000000000000),
Instruction* vop3p =
bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(0x3C00),
instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1);
- vop3p->vop3p().clamp = true;
+ vop3p->valu().clamp = true;
break;
}
Temp src = get_alu_src(ctx, instr->src[0]);
// TODO: confirm that this holds under any circumstances
} else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand::zero());
- add->vop3().clamp = true;
+ add->valu().clamp = true;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
}
Instruction* sub =
bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp,
bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi));
- sub->vop3().neg[1] = true;
+ sub->valu().neg[1] = true;
tmp = sub->definitions[0].getTemp();
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u),
Operand::c32(0x432fffffu));
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, v);
- vop3->vop3().abs[0] = true;
+ vop3->valu().abs[0] = true;
Temp cond = vop3->definitions[0].getTemp();
Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u));
Instruction* tmp0 = bld.vopc_e64(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), f32, smallest);
- tmp0->vop3().abs[0] = true;
+ tmp0->valu().abs[0] = true;
Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f32, bld.def(bld.lm), Operand::zero(), f32);
cmp_res = bld.sop2(aco_opcode::s_nand_b64, bld.def(s2), bld.def(s1, scc),
tmp0->definitions[0].getTemp(), tmp1);
ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
- aco_ptr<VOP3_instruction> vop3a{
- create_instruction<VOP3_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
+ aco_ptr<VALU_instruction> vop3a{
+ create_instruction<VALU_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
vop3a->operands[0] = Operand(ma);
vop3a->abs[0] = true;
Temp invma = bld.tmp(v1);
return true;
if (instr->isVOP3()) {
- VOP3_instruction& vop3 = instr->vop3();
+ VALU_instruction& vop3 = instr->valu();
if (instr->format == Format::VOP3)
return false;
if (vop3.clamp && instr->isVOPC() && gfx_level != GFX8)
SDWA_instruction& sdwa = instr->sdwa();
if (tmp->isVOP3()) {
- VOP3_instruction& vop3 = tmp->vop3();
+ VALU_instruction& vop3 = tmp->valu();
memcpy(sdwa.neg, vop3.neg, sizeof(sdwa.neg));
memcpy(sdwa.abs, vop3.abs, sizeof(sdwa.abs));
sdwa.omod = vop3.omod;
return false;
if (instr->isVOP3()) {
- const VOP3_instruction* vop3 = &instr->vop3();
+ const VALU_instruction* vop3 = &instr->valu();
if (vop3->clamp || vop3->omod || vop3->opsel)
return false;
if (dpp8)
dpp->bank_mask = 0xf;
if (tmp->isVOP3()) {
- const VOP3_instruction* vop3 = &tmp->vop3();
+ const VALU_instruction* vop3 = &tmp->valu();
memcpy(dpp->neg, vop3->neg, sizeof(dpp->neg));
memcpy(dpp->abs, vop3->abs, sizeof(dpp->abs));
}
struct Pseudo_barrier_instruction;
struct Pseudo_reduction_instruction;
struct VALU_instruction;
-typedef VALU_instruction VOP3P_instruction;
struct VINTERP_inreg_instruction;
-typedef VALU_instruction VOP1_instruction;
-typedef VALU_instruction VOP2_instruction;
-typedef VALU_instruction VOPC_instruction;
-typedef VALU_instruction VOP3_instruction;
struct VINTRP_instruction;
struct DPP16_instruction;
struct DPP8_instruction;
return *(Pseudo_reduction_instruction*)this;
}
constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
- VOP3P_instruction& vop3p() noexcept
- {
- assert(isVOP3P());
- return *(VOP3P_instruction*)this;
- }
- const VOP3P_instruction& vop3p() const noexcept
- {
- assert(isVOP3P());
- return *(VOP3P_instruction*)this;
- }
constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
VINTERP_inreg_instruction& vinterp_inreg() noexcept
{
return *(VINTERP_inreg_instruction*)this;
}
constexpr bool isVINTERP_INREG() const noexcept { return format == Format::VINTERP_INREG; }
- VOP1_instruction& vop1() noexcept
- {
- assert(isVOP1());
- return *(VOP1_instruction*)this;
- }
- const VOP1_instruction& vop1() const noexcept
- {
- assert(isVOP1());
- return *(VOP1_instruction*)this;
- }
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
- VOP2_instruction& vop2() noexcept
- {
- assert(isVOP2());
- return *(VOP2_instruction*)this;
- }
- const VOP2_instruction& vop2() const noexcept
- {
- assert(isVOP2());
- return *(VOP2_instruction*)this;
- }
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
- VOPC_instruction& vopc() noexcept
- {
- assert(isVOPC());
- return *(VOPC_instruction*)this;
- }
- const VOPC_instruction& vopc() const noexcept
- {
- assert(isVOPC());
- return *(VOPC_instruction*)this;
- }
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
- VOP3_instruction& vop3() noexcept
- {
- assert(isVOP3());
- return *(VOP3_instruction*)this;
- }
- const VOP3_instruction& vop3() const noexcept
- {
- assert(isVOP3());
- return *(VOP3_instruction*)this;
- }
constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
VINTRP_instruction& vintrp() noexcept
{
return true;
if (isVOP3P()) {
- const VOP3P_instruction& vop3p = this->vop3p();
+ const VALU_instruction& vop3p = this->valu();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
return true;
}
return vop3p.opsel_lo || vop3p.clamp;
} else if (isVOP3()) {
- const VOP3_instruction& vop3 = this->vop3();
+ const VALU_instruction& vop3 = this->valu();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3.abs[i] || vop3.neg[i])
return true;
Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu),
Operand::c32(0xffffffffu))
.instr;
- perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
+ perm->valu().opsel = 1; /* FI (Fetch Inactive) */
}
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand::c64(UINT64_MAX));
Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu),
Operand::c32(0xffffffffu))
.instr;
- perm->vop3().opsel = 1; /* FI (Fetch Inactive) */
+ perm->valu().opsel = 1; /* FI (Fetch Inactive) */
}
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
/* v_add_f16 is smaller because it can use 16bit fp inline constants. */
Instruction* instr = bld.vop2_e64(aco_opcode::v_add_f16, dst, op, Operand::zero());
if (dst.physReg().byte() == 2)
- instr->vop3().opsel = 0x8;
+ instr->valu().opsel = 0x8;
return;
}
op = Operand::c32((int32_t)(int16_t)op.constantValue());
// TODO: this can use VOP1 for vgpr0-127 with assembler support
Instruction* instr = bld.vop1_e64(aco_opcode::v_mov_b16, dst, op);
if (op.physReg().byte() == 2)
- instr->vop3().opsel |= 0x1;
+ instr->valu().opsel |= 0x1;
if (dst.physReg().byte() == 2)
- instr->vop3().opsel |= 0x8;
+ instr->valu().opsel |= 0x8;
}
}
if (dst.physReg().byte() == 2) {
Operand def_lo(dst.physReg().advance(-2), v2b);
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
- instr->vop3().opsel = 0;
+ instr->valu().opsel = 0;
} else {
assert(dst.physReg().byte() == 0);
Operand def_hi(dst.physReg().advance(2), v2b);
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
- instr->vop3().opsel = 2;
+ instr->valu().opsel = 2;
}
} else if (can_use_perm) {
uint8_t swiz[] = {4, 5, 6, 7};
Instruction* instr =
bld.vop3(sub ? aco_opcode::v_sub_u16_e64 : aco_opcode::v_add_u16_e64, dst, src0, src1).instr;
if (src0.physReg().byte() == 2)
- instr->vop3().opsel |= 0x1;
+ instr->valu().opsel |= 0x1;
if (src1.physReg().byte() == 2)
- instr->vop3().opsel |= 0x2;
+ instr->valu().opsel |= 0x2;
if (dst.physReg().byte() == 2)
- instr->vop3().opsel |= 0x8;
+ instr->valu().opsel |= 0x8;
}
bool
if (can_use_pack) {
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
/* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
- instr->vop3().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
+ instr->valu().opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
return;
}
std::size_t operator()(Instruction* instr) const
{
if (instr->isVOP3())
- return hash_murmur_32<VOP3_instruction>(instr);
+ return hash_murmur_32<VALU_instruction>(instr);
if (instr->isDPP16())
return hash_murmur_32<DPP16_instruction>(instr);
return a->pass_flags == b->pass_flags;
if (a->isVOP3()) {
- VOP3_instruction& a3 = a->vop3();
- VOP3_instruction& b3 = b->vop3();
+ VALU_instruction& a3 = a->valu();
+ VALU_instruction& b3 = b->valu();
for (unsigned i = 0; i < 3; i++) {
if (a3.abs[i] != b3.abs[i] || a3.neg[i] != b3.neg[i])
return false;
return true;
}
case Format::VOP3P: {
- VOP3P_instruction& a3P = a->vop3p();
- VOP3P_instruction& b3P = b->vop3p();
+ VALU_instruction& a3P = a->valu();
+ VALU_instruction& b3P = b->valu();
for (unsigned i = 0; i < 3; i++) {
if (a3P.neg_lo[i] != b3P.neg_lo[i] || a3P.neg_hi[i] != b3P.neg_hi[i])
return false;
aco_ptr<Instruction> tmp = std::move(instr);
Format format = asVOP3(tmp->format);
- instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(),
+ instr.reset(create_instruction<VALU_instruction>(tmp->opcode, format, tmp->operands.size(),
tmp->definitions.size()));
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
for (unsigned i = 0; i < instr->definitions.size(); i++) {
return index == 2 ? 64 : 32;
else if (instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16)
- return instr->vop3p().opsel_hi & (1u << index) ? 16 : 32;
+ return instr->valu().opsel_hi & (1u << index) ? 16 : 32;
else if (instr->isVALU() || instr->isSALU())
return instr_info.operand_size[(int)instr->opcode];
else
return;
/* try to fold inline constants */
- VOP3P_instruction* vop3p = &instr->vop3p();
+ VALU_instruction* vop3p = &instr->valu();
bool opsel_lo = (vop3p->opsel_lo >> i) & 1;
bool opsel_hi = (vop3p->opsel_hi >> i) & 1;
return true;
} else if (instr->isVOP3() && sel.size() == 2 &&
can_use_opsel(ctx.program->gfx_level, instr->opcode, idx) &&
- !(instr->vop3().opsel & (1 << idx))) {
+ !(instr->valu().opsel & (1 << idx))) {
return true;
} else if (instr->opcode == aco_opcode::p_extract) {
SubdwordSel instrSel = parse_extract(instr.get());
(instr->operands[!idx].is16bit() ||
instr->operands[!idx].constantValue() <= UINT16_MAX)) {
Instruction* mad =
- create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u16, Format::VOP3, 3, 1);
+ create_instruction<VALU_instruction>(aco_opcode::v_mad_u32_u16, Format::VOP3, 3, 1);
mad->definitions[0] = instr->definitions[0];
mad->operands[0] = instr->operands[0];
mad->operands[1] = instr->operands[1];
mad->operands[2] = Operand::zero();
- mad->vop3().opsel = (sel.offset() / 2) << idx;
+ mad->valu().opsel = (sel.offset() / 2) << idx;
instr.reset(mad);
} else if (can_use_SDWA(ctx.program->gfx_level, instr, true) &&
(tmp.type() == RegType::vgpr || ctx.program->gfx_level >= GFX9)) {
static_cast<SDWA_instruction*>(instr.get())->sel[idx] = sel;
} else if (instr->isVOP3()) {
if (sel.offset())
- instr->vop3().opsel |= 1 << idx;
+ instr->valu().opsel |= 1 << idx;
} else if (instr->opcode == aco_opcode::p_extract) {
SubdwordSel instrSel = parse_extract(instr.get());
instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u))) { /* -1.0 */
bool neg1 = instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 0xbf800000u);
- VOP3_instruction* vop3 = instr->isVOP3() ? &instr->vop3() : NULL;
+ VALU_instruction* vop3 = instr->isVOP3() ? &instr->valu() : NULL;
if (vop3 && (vop3->abs[!i] || vop3->neg[!i] || vop3->clamp || vop3->omod))
continue;
break;
case aco_opcode::v_med3_f16:
case aco_opcode::v_med3_f32: { /* clamp */
- VOP3_instruction& vop3 = instr->vop3();
+ VALU_instruction& vop3 = instr->valu();
if (vop3.abs[0] || vop3.abs[1] || vop3.abs[2] || vop3.neg[0] || vop3.neg[1] || vop3.neg[2] ||
vop3.omod != 0 || vop3.opsel != 0)
break;
return false;
if (op_instr[i]->isVOP3()) {
- VOP3_instruction& vop3 = op_instr[i]->vop3();
+ VALU_instruction& vop3 = op_instr[i]->valu();
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 ||
vop3.opsel == 2)
return false;
}
Instruction* new_instr;
if (neg[0] || neg[1] || abs[0] || abs[1] || opsel || num_sgprs > 1) {
- VOP3_instruction* vop3 =
- create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+ VALU_instruction* vop3 =
+ create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
for (unsigned i = 0; i < 2; i++) {
vop3->neg[i] = neg[i];
vop3->abs[i] = abs[i];
vop3->opsel = opsel;
new_instr = static_cast<Instruction*>(vop3);
} else {
- new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
+ new_instr = create_instruction<VALU_instruction>(new_op, Format::VOPC, 2, 1);
}
new_instr->operands[0] = copy_operand(ctx, Operand(op[0]));
new_instr->operands[1] = copy_operand(ctx, Operand(op[1]));
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
Instruction* new_instr;
if (cmp->isVOP3()) {
- VOP3_instruction* new_vop3 =
- create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
- VOP3_instruction& cmp_vop3 = cmp->vop3();
+ VALU_instruction* new_vop3 =
+ create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+ VALU_instruction& cmp_vop3 = cmp->valu();
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3.clamp;
new_vop3->opsel = cmp_vop3.opsel;
new_instr = new_vop3;
} else {
- new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
+ new_instr = create_instruction<VALU_instruction>(new_op, Format::VOPC, 2, 1);
}
new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
return false;
if (nan_test->isVOP3()) {
- VOP3_instruction& vop3 = nan_test->vop3();
+ VALU_instruction& vop3 = nan_test->valu();
if (vop3.neg[0] != vop3.neg[1] || vop3.abs[0] != vop3.abs[1] || vop3.opsel == 1 ||
vop3.opsel == 2)
return false;
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
Instruction* new_instr;
if (cmp->isVOP3()) {
- VOP3_instruction* new_vop3 =
- create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
- VOP3_instruction& cmp_vop3 = cmp->vop3();
+ VALU_instruction* new_vop3 =
+ create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+ VALU_instruction& cmp_vop3 = cmp->valu();
memcpy(new_vop3->abs, cmp_vop3.abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3.neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3.clamp;
new_vop3->opsel = cmp_vop3.opsel;
new_instr = new_vop3;
} else {
- new_instr = create_instruction<VOPC_instruction>(new_op, Format::VOPC, 2, 1);
+ new_instr = create_instruction<VALU_instruction>(new_op, Format::VOPC, 2, 1);
}
new_instr->operands[0] = copy_operand(ctx, cmp->operands[0]);
new_instr->operands[1] = copy_operand(ctx, cmp->operands[1]);
if (!op2_instr || op2_instr->opcode != op2)
return false;
- VOP3_instruction* op1_vop3 = op1_instr->isVOP3() ? &op1_instr->vop3() : NULL;
- VOP3_instruction* op2_vop3 = op2_instr->isVOP3() ? &op2_instr->vop3() : NULL;
+ VALU_instruction* op1_vop3 = op1_instr->isVOP3() ? &op1_instr->valu() : NULL;
+ VALU_instruction* op2_vop3 = op2_instr->isVOP3() ? &op2_instr->valu() : NULL;
if (op1_instr->isSDWA() || op2_instr->isSDWA())
return false;
Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel, bool clamp,
unsigned omod)
{
- VOP3_instruction* new_instr = create_instruction<VOP3_instruction>(opcode, Format::VOP3, 3, 1);
+ VALU_instruction* new_instr = create_instruction<VALU_instruction>(opcode, Format::VOP3, 3, 1);
memcpy(new_instr->abs, abs, sizeof(bool[3]));
memcpy(new_instr->neg, neg, sizeof(bool[3]));
new_instr->clamp = clamp;
uint8_t opsel = 0, omod = 0;
bool clamp = false;
if (instr->isVOP3())
- clamp = instr->vop3().clamp;
+ clamp = instr->valu().clamp;
ctx.uses[instr->operands[i].tempId()]--;
create_vop3_for_op3(ctx, op, instr, operands, neg, abs, opsel, clamp, omod);
aco_ptr<Instruction> new_instr;
if (instr->operands[!i].isTemp() &&
instr->operands[!i].getTemp().type() == RegType::vgpr) {
- new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2));
+ new_instr.reset(create_instruction<VALU_instruction>(new_op, Format::VOP2, 3, 2));
} else if (ctx.program->gfx_level >= GFX10 ||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
new_instr.reset(
- create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
+ create_instruction<VALU_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
} else {
return false;
}
op_instr->operands[0].getTemp().type() == RegType::vgpr &&
op_instr->operands[1].constantEquals(0)) {
aco_ptr<Instruction> new_instr{
- create_instruction<VOP3_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
+ create_instruction<VALU_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
ctx.uses[instr->operands[i].tempId()]--;
new_instr->operands[0] = op_instr->operands[0];
new_instr->operands[1] = instr->operands[!i];
if (instr->operands[!i].isTemp() &&
instr->operands[!i].getTemp().type() == RegType::vgpr) {
new_instr.reset(
- create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
+ create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
} else if (ctx.program->gfx_level >= GFX10 ||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
- new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32,
+ new_instr.reset(create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32,
asVOP3(Format::VOP2), 3, 1));
} else {
return false;
ctx.uses[instr->operands[i].tempId()]--;
aco_opcode mad_op = is_sub ? aco_opcode::v_mad_i32_i24 : aco_opcode::v_mad_u32_u24;
- aco_ptr<VOP3_instruction> new_instr{
- create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)};
+ aco_ptr<VALU_instruction> new_instr{
+ create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1)};
for (unsigned op_idx = 0; op_idx < 3; ++op_idx)
new_instr->operands[op_idx] = ops[op_idx];
new_instr->definitions[0] = instr->definitions[0];
}
void
-propagate_swizzles(VOP3P_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
+propagate_swizzles(VALU_instruction* instr, uint8_t opsel_lo, uint8_t opsel_hi)
{
/* propagate swizzles which apply to a result down to the instruction's operands:
* result = a.xy + b.xx -> result.yx = a.yx + b.xx */
void
combine_vop3p(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
- VOP3P_instruction* vop3p = &instr->vop3p();
+ VALU_instruction* vop3p = &instr->valu();
/* apply clamp */
if (instr->opcode == aco_opcode::v_pk_mul_f16 && instr->operands[1].constantEquals(0x3C00) &&
ssa_info& info = ctx.info[instr->operands[0].tempId()];
if (info.is_vop3p() && instr_info.can_use_output_modifiers[(int)info.instr->opcode]) {
- VOP3P_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->vop3p();
+ VALU_instruction* candidate = &ctx.info[instr->operands[0].tempId()].instr->valu();
candidate->clamp = true;
propagate_swizzles(candidate, vop3p->opsel_lo, vop3p->opsel_hi);
instr->definitions[0].swapTemp(candidate->definitions[0]);
if (info.is_vop3p() && info.instr->opcode == aco_opcode::v_pk_mul_f16 &&
info.instr->operands[1].constantEquals(0x3C00)) {
- VOP3P_instruction* fneg = &info.instr->vop3p();
+ VALU_instruction* fneg = &info.instr->valu();
if ((fneg->opsel_lo | fneg->opsel_hi) & 2)
continue;
continue;
/* no clamp allowed between mul and add */
- if (info.instr->vop3p().clamp)
+ if (info.instr->valu().clamp)
continue;
mul_instr = info.instr;
/* turn packed mul+add into v_pk_fma_f16 */
assert(mul_instr->isVOP3P());
aco_opcode mad = fadd ? aco_opcode::v_pk_fma_f16 : aco_opcode::v_pk_mad_u16;
- aco_ptr<VOP3P_instruction> fma{
- create_instruction<VOP3P_instruction>(mad, Format::VOP3P, 3, 1)};
- VOP3P_instruction* mul = &mul_instr->vop3p();
+ aco_ptr<VALU_instruction> fma{create_instruction<VALU_instruction>(mad, Format::VOP3P, 3, 1)};
+ VALU_instruction* mul = &mul_instr->valu();
for (unsigned i = 0; i < 2; i++) {
fma->operands[i] = op[i];
fma->neg_lo[i] = mul->neg_lo[i];
return false;
if (instr->isVOP3())
- return !instr->vop3().omod && !(instr->vop3().opsel & 0x8);
+ return !instr->valu().omod && !(instr->valu().opsel & 0x8);
return instr->format == Format::VOP2;
}
{
bool is_add = instr->opcode != aco_opcode::v_mul_f32 && instr->opcode != aco_opcode::v_fma_f32;
- aco_ptr<VOP3P_instruction> vop3p{
- create_instruction<VOP3P_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
+ aco_ptr<VALU_instruction> vop3p{
+ create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1)};
- vop3p->opsel_lo = instr->isVOP3() ? ((instr->vop3().opsel & 0x7) << (is_add ? 1 : 0)) : 0x0;
+ vop3p->opsel_lo = instr->isVOP3() ? ((instr->valu().opsel & 0x7) << (is_add ? 1 : 0)) : 0x0;
vop3p->opsel_hi = 0x0;
for (unsigned i = 0; i < instr->operands.size(); i++) {
vop3p->operands[is_add + i] = instr->operands[i];
vop3p->neg_lo[1] ^= true;
}
vop3p->definitions[0] = instr->definitions[0];
- vop3p->clamp = instr->isVOP3() && instr->vop3().clamp;
+ vop3p->clamp = instr->isVOP3() && instr->valu().clamp;
instr = std::move(vop3p);
ctx.info[instr->definitions[0].tempId()].label &= label_f2f16 | label_clamp | label_mul;
if (conv->isSDWA() && (conv->sdwa().dst_sel.size() != 4 || conv->sdwa().sel[0].size() != 2 ||
conv->sdwa().clamp || conv->sdwa().omod)) {
continue;
- } else if (conv->isVOP3() && (conv->vop3().clamp || conv->vop3().omod)) {
+ } else if (conv->isVOP3() && (conv->valu().clamp || conv->valu().omod)) {
continue;
} else if (conv->isDPP()) {
continue;
instr->operands[i].setTemp(conv->operands[0].getTemp());
if (conv->definitions[0].isPrecise())
instr->definitions[0].setPrecise(true);
- instr->vop3p().opsel_hi ^= 1u << i;
+ instr->valu().opsel_hi ^= 1u << i;
if (conv->isSDWA() && conv->sdwa().sel[0].offset() == 2)
- instr->vop3p().opsel_lo |= 1u << i;
+ instr->valu().opsel_lo |= 1u << i;
bool neg = conv->valu().neg[0];
bool abs = conv->valu().abs[0];
- if (!instr->vop3p().abs[i]) {
- instr->vop3p().neg[i] ^= neg;
- instr->vop3p().abs[i] = abs;
+ if (!instr->valu().abs[i]) {
+ instr->valu().neg[i] ^= neg;
+ instr->valu().abs[i] = abs;
}
}
}
if (mul_instr->operands[0].isLiteral())
return;
- if (mul_instr->isVOP3() && mul_instr->vop3().clamp)
+ if (mul_instr->isVOP3() && mul_instr->valu().clamp)
return;
if (mul_instr->isSDWA() || mul_instr->isDPP() || mul_instr->isVOP3P())
return;
bool is_neg = ctx.info[instr->definitions[0].tempId()].is_neg();
bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
instr.reset(
- create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
+ create_instruction<VALU_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
instr->operands[0] = mul_instr->operands[0];
instr->operands[1] = mul_instr->operands[1];
instr->definitions[0] = def;
- VOP3_instruction& new_mul = instr->vop3();
+ VALU_instruction& new_mul = instr->valu();
if (mul_instr->isVOP3()) {
- VOP3_instruction& mul = mul_instr->vop3();
+ VALU_instruction& mul = mul_instr->valu();
new_mul.neg[0] = mul.neg[0];
new_mul.neg[1] = mul.neg[1];
new_mul.abs[0] = mul.abs[0];
bool is_add_mix =
(instr->opcode == aco_opcode::v_fma_mix_f32 ||
instr->opcode == aco_opcode::v_fma_mixlo_f16) &&
- !instr->vop3p().neg_lo[0] &&
- ((instr->operands[0].constantEquals(0x3f800000) && (instr->vop3p().opsel_hi & 0x1) == 0) ||
- (instr->operands[0].constantEquals(0x3C00) && (instr->vop3p().opsel_hi & 0x1) &&
- !(instr->vop3p().opsel_lo & 0x1)));
+ !instr->valu().neg_lo[0] &&
+ ((instr->operands[0].constantEquals(0x3f800000) && (instr->valu().opsel_hi & 0x1) == 0) ||
+ (instr->operands[0].constantEquals(0x3C00) && (instr->valu().opsel_hi & 0x1) &&
+ !(instr->valu().opsel_lo & 0x1)));
bool mad32 = instr->opcode == aco_opcode::v_add_f32 || instr->opcode == aco_opcode::v_sub_f32 ||
instr->opcode == aco_opcode::v_subrev_f32;
bool mad16 = instr->opcode == aco_opcode::v_add_f16 || instr->opcode == aco_opcode::v_sub_f16 ||
ssa_info& info = ctx.info[instr->operands[i].tempId()];
/* no clamp/omod allowed between mul and add */
- if (info.instr->isVOP3() && (info.instr->vop3().clamp || info.instr->vop3().omod))
+ if (info.instr->isVOP3() && (info.instr->valu().clamp || info.instr->valu().omod))
continue;
- if (info.instr->isVOP3P() && info.instr->vop3p().clamp)
+ if (info.instr->isVOP3P() && info.instr->valu().clamp)
continue;
/* v_fma_mix_f32/etc can't do omod */
- if (info.instr->isVOP3P() && instr->isVOP3() && instr->vop3().omod)
+ if (info.instr->isVOP3P() && instr->isVOP3() && instr->valu().omod)
continue;
/* don't promote fp16 to fp32 or remove fp32->fp16->fp32 conversions */
if (is_add_mix && info.instr->definitions[0].bytes() == 2)
aco_opcode mad_op = add_instr->definitions[0].bytes() == 2 ? aco_opcode::v_fma_mixlo_f16
: aco_opcode::v_fma_mix_f32;
- mad.reset(create_instruction<VOP3P_instruction>(mad_op, Format::VOP3P, 3, 1));
+ mad.reset(create_instruction<VALU_instruction>(mad_op, Format::VOP3P, 3, 1));
} else {
assert(!opsel_lo);
assert(!opsel_hi);
mad_op = aco_opcode::v_fma_f64;
}
- mad.reset(create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1));
+ mad.reset(create_instruction<VALU_instruction>(mad_op, Format::VOP3, 3, 1));
}
for (unsigned i = 0; i < 3; i++) {
ctx.uses[instr->operands[i].tempId()]--;
ctx.uses[ctx.info[instr->operands[i].tempId()].temp.id()]++;
- aco_ptr<VOP2_instruction> new_instr{
- create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
+ aco_ptr<VALU_instruction> new_instr{
+ create_instruction<VALU_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)};
new_instr->operands[0] = Operand::zero();
new_instr->operands[1] = instr->operands[!i];
new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp);
if ((instr->opcode == aco_opcode::v_fma_f32 ||
(instr->opcode == aco_opcode::v_mad_f32 && !instr->definitions[0].isPrecise())) &&
- !instr->vop3().omod && ctx.program->gfx_level >= GFX10 &&
+ !instr->valu().omod && ctx.program->gfx_level >= GFX10 &&
util_bitcount(fp16_mask) > std::max<uint32_t>(util_bitcount(literal_mask), 1)) {
assert(ctx.program->dev.fused_mad_mix);
u_foreach_bit (i, fp16_mask)
if (instr->opcode != aco_opcode::v_pk_fma_f16)
return;
- VOP3P_instruction& vop3p = instr->vop3p();
+ VALU_instruction& vop3p = instr->valu();
unsigned literal_swizzle = ~0u;
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (has_dead_literal && info->fp16_mask) {
aco_ptr<Instruction> fma_mix(
- create_instruction<VOP3P_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1));
+ create_instruction<VALU_instruction>(aco_opcode::v_fma_mix_f32, Format::VOP3P, 3, 1));
- fma_mix->vop3p().clamp = instr->vop3().clamp;
- std::copy(std::cbegin(instr->vop3().abs), std::cend(instr->vop3().abs),
- std::begin(fma_mix->vop3p().neg_hi));
- std::copy(std::cbegin(instr->vop3().neg), std::cend(instr->vop3().neg),
- std::begin(fma_mix->vop3p().neg_lo));
+ fma_mix->valu().clamp = instr->valu().clamp;
+ std::copy(std::cbegin(instr->valu().abs), std::cend(instr->valu().abs),
+ std::begin(fma_mix->valu().neg_hi));
+ std::copy(std::cbegin(instr->valu().neg), std::cend(instr->valu().neg),
+ std::begin(fma_mix->valu().neg_lo));
uint32_t literal = 0;
bool second = false;
u_foreach_bit (i, info->fp16_mask) {
float value = uif(ctx.info[instr->operands[i].tempId()].val);
literal |= _mesa_float_to_half(value) << (second * 16);
- fma_mix->vop3p().opsel_lo |= second << i;
- fma_mix->vop3p().opsel_hi |= 1 << i;
+ fma_mix->valu().opsel_lo |= second << i;
+ fma_mix->valu().opsel_hi |= 1 << i;
second = true;
}
new_op = madak ? aco_opcode::v_fmaak_f16 : aco_opcode::v_fmamk_f16;
uint32_t literal = ctx.info[instr->operands[ffs(info->literal_mask) - 1].tempId()].val;
- new_mad.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 1));
+ new_mad.reset(create_instruction<VALU_instruction>(new_op, Format::VOP2, 3, 1));
for (unsigned i = 0; i < 3; i++) {
if (info->literal_mask & (1 << i))
new_mad->operands[i] = Operand::literal32(literal);
opsel[i] = valu.opsel & (1 << i);
}
} else if (instr->isVOP3P() && is_mad_mix) {
- const VOP3P_instruction& vop3p = instr->vop3p();
+ const VALU_instruction& vop3p = instr->valu();
for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
abs[i] = vop3p.neg_hi[i];
neg[i] = vop3p.neg_lo[i];
fprintf(output, "|");
if (instr->isVOP3P() && !is_mad_mix) {
- const VOP3P_instruction& vop3 = instr->vop3p();
+ const VALU_instruction& vop3 = instr->valu();
if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x',
vop3.opsel_hi & (1 << i) ? 'y' : 'x');
/* check if we can use opsel */
if (instr->format == Format::VOP3) {
assert(byte == 2);
- instr->vop3().opsel |= 1 << idx;
+ instr->valu().opsel |= 1 << idx;
return;
}
if (instr->isVINTERP_INREG()) {
return;
}
if (instr->isVOP3P()) {
- assert(byte == 2 && !(instr->vop3p().opsel_lo & (1 << idx)));
- instr->vop3p().opsel_lo |= 1 << idx;
- instr->vop3p().opsel_hi |= 1 << idx;
+ assert(byte == 2 && !(instr->valu().opsel_lo & (1 << idx)));
+ instr->valu().opsel_lo |= 1 << idx;
+ instr->valu().opsel_hi |= 1 << idx;
return;
}
if (instr->opcode == aco_opcode::v_cvt_f32_ubyte0) {
if (instr->format == Format::VOP3) {
assert(reg.byte() == 2);
assert(can_use_opsel(gfx_level, instr->opcode, -1));
- instr->vop3().opsel |= (1 << 3); /* dst in high half */
+ instr->valu().opsel |= (1 << 3); /* dst in high half */
return;
} else if (instr->isVINTERP_INREG()) {
assert(reg.byte() == 2);
return;
}
- static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
- "Invalid direct instruction cast.");
- static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
- "Invalid direct instruction cast.");
instr->format = Format::VOP2;
instr->valu().opsel_hi = 0;
switch (instr->opcode) {
mov.reset(create_instruction<SOP1_instruction>(aco_opcode::s_mov_b32,
Format::SOP1, 1, 1));
else
- mov.reset(create_instruction<VOP1_instruction>(aco_opcode::v_mov_b32,
+ mov.reset(create_instruction<VALU_instruction>(aco_opcode::v_mov_b32,
Format::VOP1, 1, 1));
mov->operands[0] = instr->operands[0];
mov->definitions[0] = Definition(tmp);
/* change the instruction to VOP3 to enable an arbitrary register pair as dst */
aco_ptr<Instruction> tmp = std::move(instr);
Format format = asVOP3(tmp->format);
- instr.reset(create_instruction<VOP3_instruction>(
+ instr.reset(create_instruction<VALU_instruction>(
tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin());
std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin());
aco_ptr<Instruction> res;
if (instr->isVOP1()) {
- res.reset(create_instruction<VOP1_instruction>(
+ res.reset(create_instruction<VALU_instruction>(
instr->opcode, instr->format, instr->operands.size(), instr->definitions.size()));
} else if (instr->isSOP1()) {
res.reset(create_instruction<SOP1_instruction>(
return;
} else if (!exec_val->isVOP3()) {
aco_ptr<Instruction> tmp = std::move(exec_val);
- exec_val.reset(create_instruction<VOPC_instruction>(
+ exec_val.reset(create_instruction<VALU_instruction>(
tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), exec_val->operands.begin());
std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(),
exec_val->definitions.begin());
} else {
aco_ptr<Instruction> tmp = std::move(exec_val);
- exec_val.reset(create_instruction<VOP3_instruction>(
+ exec_val.reset(create_instruction<VALU_instruction>(
tmp->opcode, tmp->format, tmp->operands.size(), tmp->definitions.size() + 1));
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), exec_val->operands.begin());
std::copy(tmp->definitions.cbegin(), tmp->definitions.cend(),
exec_val->definitions.begin());
- VOP3_instruction& src = tmp->vop3();
- VOP3_instruction& dst = exec_val->vop3();
+ VALU_instruction& src = tmp->valu();
+ VALU_instruction& dst = exec_val->valu();
dst.opsel = src.opsel;
dst.omod = src.omod;
dst.clamp = src.clamp;
/* check opsel */
if (instr->isVOP3()) {
- VOP3_instruction& vop3 = instr->vop3();
+ VALU_instruction& vop3 = instr->valu();
check(vop3.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
instr.get());
(instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
"v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
} else if (instr->isVOP3P()) {
- VOP3P_instruction& vop3p = instr->vop3p();
+ VALU_instruction& vop3p = instr->valu();
for (unsigned i = 0; i < instr->operands.size(); i++) {
if (instr->operands[i].hasRegClass() &&
instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
instr->opcode == aco_opcode::v_fma_mix_f32;
- return ((instr->vop3p().opsel_lo >> index) & 1) == (byte >> 1) &&
- ((instr->vop3p().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
+ return ((instr->valu().opsel_lo >> index) & 1) == (byte >> 1) &&
+ ((instr->valu().opsel_hi >> index) & 1) == (fma_mix || (byte >> 1));
}
if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
return true;
{
if (src.bytes() == 2) {
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
- res->vop3().abs[1] = true;
+ res->valu().abs[1] = true;
return res;
} else {
Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
- res->vop3().abs[1] = true;
+ res->valu().abs[1] = true;
return res;
}
}
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
//~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
- aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
+ aco_ptr<VALU_instruction> add3{
+ create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand::zero();
add3->operands[1] = Operand::zero();
add3->operands[2] = Operand::zero();
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
//~gfx10>> integer addition + clamp ; d76d8000 02010080
- aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
+ aco_ptr<VALU_instruction> add3{
+ create_instruction<VALU_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand::zero();
add3->operands[1] = Operand::zero();
add3->operands[2] = Operand::zero();
//! v1: %res1 = v_add_u32 %a, %tmp1
//! p_unit_test 1, %res1
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
- tmp->vop3().clamp = true;
+ tmp->valu().clamp = true;
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp2 = v_add_u32 %b, %c
//! p_unit_test 2, %res2
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
- tmp->vop3().clamp = true;
+ tmp->valu().clamp = true;
writeout(2, tmp);
finish_opt_test();
//! p_unit_test 4, %res4
Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp4, b);
- res4->vop3().neg[0] = true;
+ res4->valu().neg[0] = true;
writeout(4, res4);
//! v1: %tmp5 = v_mov_b32 %a row_mirror bound_ctrl:1
//! p_unit_test 5, %res5
Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp5, b);
- res5->vop3().clamp = true;
+ res5->valu().clamp = true;
writeout(5, res5);
//! v1: %res6 = v_add_f32 |%a|, %b row_mirror bound_ctrl:1
auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
tmp6->dpp16().neg[0] = true;
auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1), tmp6, b);
- res6->vop3().abs[0] = true;
+ res6->valu().abs[0] = true;
writeout(6, res6);
//! v1: %res7 = v_subrev_f32 %a, |%b| row_mirror bound_ctrl:1
//! p_unit_test 7, %res7
Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), a, dpp_row_mirror);
auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), b, tmp7);
- res7->vop3().abs[0] = true;
+ res7->valu().abs[0] = true;
writeout(7, res7);
//! v1: %tmp11 = v_mov_b32 -%a row_mirror bound_ctrl:1
//! p_unit_test 4, %res4:v[2]
Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
- res4->vop3().neg[0] = true;
+ res4->valu().neg[0] = true;
writeout(4, Operand(res4, reg_v2));
//! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
//! p_unit_test 5, %res5:v[2]
Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
- res5->vop3().clamp = true;
+ res5->valu().clamp = true;
writeout(5, Operand(res5, reg_v2));
//! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
tmp6->dpp16().neg[0] = true;
auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
- res6->vop3().abs[0] = true;
+ res6->valu().abs[0] = true;
writeout(6, Operand(res6, reg_v2));
//! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1
//! p_unit_test 7, %res7:v[2]
Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
- res7->vop3().abs[0] = true;
+ res7->valu().abs[0] = true;
writeout(7, Operand(res7, reg_v2));
//! v1: %tmp12:v[2] = v_mov_b32 -%a:v[0] row_mirror bound_ctrl:1
//! p_unit_test 0, %res0
Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::zero());
- VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->vop3();
+ VALU_instruction* mul =
+ &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->valu();
mul->neg[0] = true;
mul->abs[0] = true;
writeout(0, mul->definitions[0].getTemp());
//! p_unit_test 1, %res1
byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(),
Operand::c32(8u), Operand::zero());
- mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->vop3();
+ mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b)->valu();
mul->omod = 2;
writeout(1, mul->definitions[0].getTemp());