Opc == AMDGPU::V_CNDMASK_B64_PSEUDO) {
const MachineOperand *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = TII->getNamedOperand(*MI, AMDGPU::OpName::src1);
- if (Src1->isIdenticalTo(*Src0)) {
+ int Src1ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
+ int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
+ if (Src1->isIdenticalTo(*Src0) &&
+ (Src1ModIdx == -1 || !MI->getOperand(Src1ModIdx).getImm()) &&
+ (Src0ModIdx == -1 || !MI->getOperand(Src0ModIdx).getImm())) {
LLVM_DEBUG(dbgs() << "Folded " << *MI << " into ");
+ auto &NewDesc =
+ TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY : getMovOpc(false));
int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
if (Src2Idx != -1)
MI->RemoveOperand(Src2Idx);
MI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1));
- mutateCopyOp(*MI, TII->get(Src0->isReg() ? (unsigned)AMDGPU::COPY
- : getMovOpc(false)));
+ if (Src1ModIdx != -1)
+ MI->RemoveOperand(Src1ModIdx);
+ if (Src0ModIdx != -1)
+ MI->RemoveOperand(Src0ModIdx);
+ mutateCopyOp(*MI, NewDesc);
LLVM_DEBUG(dbgs() << *MI << '\n');
return true;
}
BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
.addReg(SrcCond);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+ .addImm(0)
.addReg(Src0, 0, AMDGPU::sub0)
+ .addImm(0)
.addReg(Src1, 0, AMDGPU::sub0)
.addReg(SrcCondCopy);
BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+ .addImm(0)
.addReg(Src0, 0, AMDGPU::sub1)
+ .addImm(0)
.addReg(Src1, 0, AMDGPU::sub1)
.addReg(SrcCondCopy);
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(Cond[0]);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
} else if (Cond.size() == 2) {
.addImm(-1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
.addImm(0)
.addImm(-1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
BuildMI(MBB, I, DL, get(AMDGPU::COPY), SReg)
.add(RegOp);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(TrueReg)
+ .addImm(0)
.addReg(FalseReg)
.addReg(SReg);
break;
.addImm(-1)
.addImm(0);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
break;
.addImm(0)
.addImm(-1);
BuildMI(MBB, I, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
+ .addImm(0)
.addReg(FalseReg)
+ .addImm(0)
.addReg(TrueReg)
.addReg(SReg);
llvm_unreachable("Unhandled branch predicate EXECZ");
// Can't shrink instruction with three operands.
// FIXME: v_cndmask_b32 has 3 operands and is shrinkable, but we need to add
// a special case for it. It can only be shrunk if the third operand
- // is vcc. We should handle this the same way we handle vopc, by addding
+ // is vcc, and src0_modifiers and src1_modifiers are not set.
+ // We should handle this the same way we handle vopc, by addding
// a register allocation hint pre-regalloc and then do the shrinking
// post-regalloc.
if (Src2) {
}
// Return type of input modifiers operand for specified input operand
-class getSrcMod <ValueType VT> {
+class getSrcMod <ValueType VT, bit EnableF32SrcMods> {
bit isFP = !if(!eq(VT.Value, f16.Value), 1,
!if(!eq(VT.Value, f32.Value), 1,
!if(!eq(VT.Value, f64.Value), 1,
FP16InputMods,
FP32InputMods
),
- Int32InputMods)
+ !if(EnableF32SrcMods, FP32InputMods, Int32InputMods))
);
}
// Returns the input arguments for VOP3 instructions for the given SrcVT.
class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
- bit HasIntClamp, bit HasModifiers, bit HasOMod,
+ bit HasIntClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
dag ret =
/* endif */ )
/* NumSrcArgs == 3 */,
!if (!eq(HasModifiers, 1),
- // VOP3 with modifiers
- !if (!eq(HasOMod, 1),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp, omod:$omod),
- (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
- Src1Mod:$src1_modifiers, Src1RC:$src1,
- Src2Mod:$src2_modifiers, Src2RC:$src2,
- clampmod:$clamp))
+ !if (!eq(HasSrc2Mods, 1),
+ // VOP3 with modifiers
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp, omod:$omod),
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2,
+ clampmod:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2Mod:$src2_modifiers, Src2RC:$src2))),
+ // VOP3 with modifiers except src2
+ !if (!eq(HasOMod, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2, clampmod:$clamp, omod:$omod),
+ !if (!eq(HasIntClamp, 1),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2, clampmod:$clamp),
+ (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+ Src1Mod:$src1_modifiers, Src1RC:$src1,
+ Src2RC:$src2))))
/* else */,
// VOP3 without modifiers
!if (!eq(HasIntClamp, 1),
int Pattern = 1;
}
-class VOPProfile <list<ValueType> _ArgVT> {
+class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0> {
field list<ValueType> ArgVT = _ArgVT;
+ field bit EnableF32SrcMods = _EnableF32SrcMods;
field ValueType DstVT = ArgVT[0];
field ValueType Src0VT = ArgVT[1];
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
- field Operand Src0Mod = getSrcMod<Src0VT>.ret;
- field Operand Src1Mod = getSrcMod<Src1VT>.ret;
- field Operand Src2Mod = getSrcMod<Src2VT>.ret;
+ field Operand Src0Mod = getSrcMod<Src0VT, EnableF32SrcMods>.ret;
+ field Operand Src1Mod = getSrcMod<Src1VT, EnableF32SrcMods>.ret;
+ field Operand Src2Mod = getSrcMod<Src2VT, EnableF32SrcMods>.ret;
field Operand Src0ModDPP = getSrcModExt<Src0VT>.ret;
field Operand Src1ModDPP = getSrcModExt<Src1VT>.ret;
field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret;
field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
// TODO: Modifiers logic is somewhat adhoc here, to be refined later
- field bit HasModifiers = isModifierType<Src0VT>.ret;
+ // HasModifiers affects the normal and DPP encodings. We take note of EnableF32SrcMods, which
+ // enables modifiers for i32 type.
+ field bit HasModifiers = BitOr<isModifierType<Src0VT>.ret, EnableF32SrcMods>.ret;
+ // HasSrc*FloatMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
+ // HasSrc*IntMods affects the SDWA encoding. We ignore EnableF32SrcMods.
field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
- field bit HasClamp = HasModifiers;
+ field bit HasClamp = isModifierType<Src0VT>.ret;
field bit HasSDWAClamp = EmitDst;
field bit HasFPClamp = BitAnd<isFloatType<DstVT>.ret, HasClamp>.ret;
field bit HasIntClamp = !if(isFloatType<DstVT>.ret, 0, HasClamp);
field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
- HasIntClamp, HasModifiers, HasOMod, Src0Mod, Src1Mod,
- Src2Mod>.ret;
+ HasIntClamp, HasModifiers, HasSrc2Mods,
+ HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
field dag InsVOP3P = getInsVOP3P<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp,
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
multiclass SelectPat <ValueType vt, Instruction inst> {
def : GCNPat <
(vt (select i1:$src0, vt:$src1, vt:$src2)),
- (inst $src2, $src1, $src0)
+ (inst (i32 0), $src2, (i32 0), $src1, $src0)
>;
}
def : GCNPat <
(i32 (sext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0)
>;
class Ext32Pat <SDNode ext> : GCNPat <
(i32 (ext i1:$src0)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src0)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1), $src0)
>;
def : Ext32Pat <zext>;
class ZExt_i64_i1_Pat <SDNode ext> : GCNPat <
(i64 (ext i1:$src)),
(REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
- (S_MOV_B32 (i32 0)), sub1)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1), $src),
+ sub0, (S_MOV_B32 (i32 0)), sub1)
>;
def : GCNPat <
(i64 (sext i1:$src)),
(REG_SEQUENCE VReg_64,
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub0,
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src), sub1)
>;
class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : GCNPat <
def : GCNPat <
(f16 (sint_to_fp i1:$src)),
- (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src))
+ (V_CVT_F16_F32_e32 (
+ V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+ $src))
>;
def : GCNPat <
(f16 (uint_to_fp i1:$src)),
- (V_CVT_F16_F32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src))
+ (V_CVT_F16_F32_e32 (
+ V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+ $src))
>;
def : GCNPat <
(f32 (sint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_NEG_ONE),
+ $src)
>;
def : GCNPat <
(f32 (uint_to_fp i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 CONST.FP32_ONE),
+ $src)
>;
def : GCNPat <
(f64 (sint_to_fp i1:$src)),
- (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src))
+ (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1),
+ $src))
>;
def : GCNPat <
(f64 (uint_to_fp i1:$src)),
- (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src))
+ (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 1),
+ $src))
>;
//===----------------------------------------------------------------------===//
ConstrainRegs.insert(SrcReg);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
+ .addImm(0)
+ .addImm(0)
.addImm(-1)
.addReg(SrcReg);
DeadCopies.push_back(&MI);
if (!Sel || Sel->getOpcode() != AMDGPU::V_CNDMASK_B32_e64)
return AMDGPU::NoRegister;
+ if (TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers) ||
+ TII->hasModifiersSet(*Sel, AMDGPU::OpName::src0_modifiers))
+ return AMDGPU::NoRegister;
+
Op1 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src0);
Op2 = TII->getNamedOperand(*Sel, AMDGPU::OpName::src1);
MachineOperand *CC = TII->getNamedOperand(*Sel, AMDGPU::OpName::src2);
class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
- 0, HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
+ 0, HasModifiers, HasModifiers, HasOMod,
+ Src0Mod, Src1Mod, Src2Mod>.ret;
let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
VGPR_32:$src2, // stub argument
let HasExtSDWA9 = 1;
}
-// Read in from vcc or arbitrary SGPR
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+// Read in from vcc or arbitrary SGPR.
+// Enable f32 source modifiers on i32 input type.
+def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1], /*EnableF32SrcMods=*/1> {
let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
let Asm32 = "$vdst, $src0, $src1, vcc";
- let Asm64 = "$vdst, $src0, $src1, $src2";
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2";
let AsmSDWA = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmSDWA9 = "$vdst, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
let AsmDPP = "$vdst, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
src0_sel:$src0_sel, src1_sel:$src1_sel);
let InsDPP = (ins DstRCDPP:$old,
- Src0DPP:$src0,
- Src1DPP:$src1,
+ Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+ Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
let HasExt = 1;
class ZExt_i16_i1_Pat <SDNode ext> : GCNPat <
(i16 (ext i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
+ (V_CNDMASK_B32_e64 (i32 0/*src0mod*/), (i32 0/*src0*/),
+ (i32 0/*src1mod*/), (i32 1/*src1*/),
+ $src)
>;
let Predicates = [Has16BitInsts] in {
def : GCNPat <
(i16 (sext i1:$src)),
- (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+ (V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), $src)
>;
// Undo sub x, c -> add x, -c canonicalization since c is more likely
class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
// v_div_scale_{f32|f64} do not support input modifiers.
let HasModifiers = 0;
+ let HasClamp = 0;
let HasOMod = 0;
let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
let Asm64 = " $vdst, $sdst, $src0, $src1, $src2";
%13:vgpr_32 = V_OR_B32_e32 %11, %12.sub2, implicit $exec
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
- %16:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %15, implicit $exec
+ %16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
S_ENDPGM 0
successors: %bb.10
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
- %35:vgpr_32 = V_CNDMASK_B32_e64 0, -1, %34, implicit $exec
+ %35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
%28:vreg_1 = COPY %35
S_BRANCH %bb.10
$exec = S_OR_B64 $exec, %29, implicit-def $scc
%36:vreg_1 = COPY %28
%37:sreg_64_xexec = V_CMP_NE_U32_e64 0, %36, implicit $exec
- %38:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %37, implicit $exec
+ %38:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %37, implicit $exec
%39:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
undef %40.sub0:vreg_128 = COPY %39
%40.sub1:vreg_128 = COPY %39
%29.sub0:vreg_128 = COPY %1
%30:sreg_64 = V_CMP_NE_U32_e64 0, %28, implicit $exec
%31:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %28, implicit $exec
- dead %32:vgpr_32 = V_CNDMASK_B32_e64 0, -1, killed %31, implicit $exec
+ dead %32:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %31, implicit $exec
%33:vreg_128 = COPY %29
%33.sub1:vreg_128 = COPY undef %32
%34:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
%18:vgpr_32 = V_MIN_F32_e32 1065353216, killed %17, implicit $exec
%19:sreg_64_xexec = V_CMP_NEQ_F32_e64 0, 1065353216, 0, killed %18, 0, implicit $exec
%20:vgpr_32 = V_MOV_B32_e32 2143289344, implicit $exec
- %21:vgpr_32 = V_CNDMASK_B32_e64 0, killed %20, killed %19, implicit $exec
+ %21:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, killed %20, killed %19, implicit $exec
%22:sreg_64 = V_CMP_LT_F32_e64 0, 0, 0, killed %21, 0, implicit $exec
%23:sreg_64 = COPY $exec, implicit-def $exec
%24:sreg_64 = S_AND_B64 %23, %22, implicit-def dead $scc
body: |
bb.0.entry:
%0 = IMPLICIT_DEF
- %1 = V_CNDMASK_B32_e64 0, 0, %0, implicit $exec
- %2 = V_CNDMASK_B32_e64 %1, %1, %0, implicit $exec
+ %1 = V_CNDMASK_B32_e64 0, 0, 0, 0, %0, implicit $exec
+ %2 = V_CNDMASK_B32_e64 0, %1, 0, %1, %0, implicit $exec
%3 = IMPLICIT_DEF
- %4 = V_CNDMASK_B32_e64 %3, %3, %0, implicit $exec
+ %4 = V_CNDMASK_B32_e64 0, %3, 0, %3, %0, implicit $exec
%5 = COPY %1
- %6 = V_CNDMASK_B32_e64 %5, 0, %0, implicit $exec
+ %6 = V_CNDMASK_B32_e64 0, %5, 0, 0, %0, implicit $exec
$vcc = IMPLICIT_DEF
%7 = V_CNDMASK_B32_e32 %3, %3, implicit $exec, implicit $vcc
# GCN-LABEL: name: cluster_cmp_cndmask
# GCN: S_NOP 0, implicit-def $vcc
# GCN-NEXT: %3:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec
-# GCN-NEXT: dead %4:vgpr_32 = V_CNDMASK_B32_e64 %0, %1, %3, implicit $exec
+# GCN-NEXT: dead %4:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %1, %3, implicit $exec
name: cluster_cmp_cndmask
registers:
- { id: 0, class: vgpr_32 }
%1 = V_MOV_B32_e32 0, implicit $exec
%3 = V_CMP_EQ_I32_e64 %0, %1, implicit $exec
S_NOP 0, implicit def $vcc
- %4 = V_CNDMASK_B32_e64 %0, %1, %3, implicit $exec
+ %4 = V_CNDMASK_B32_e64 0, %0, 0, %1, %3, implicit $exec
...
# GCN-LABEL: name: cluster_multi_use_cmp_cndmask
# GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec
-# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec
-# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec
+# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec
+# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec
name: cluster_multi_use_cmp_cndmask
registers:
- { id: 0, class: vgpr_32 }
%4 = V_CMP_EQ_I32_e64 %0, %1, implicit $exec
S_NOP 0, implicit def $vcc
- %5 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec
- %6 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec
+ %5 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec
+ %6 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec
...
# GCN-LABEL: name: cluster_multi_use_cmp_cndmask2
# GCN: %4:sreg_64_xexec = V_CMP_EQ_I32_e64 %0, %1, implicit $exec
-# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec
+# GCN-NEXT: dead %5:vgpr_32 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec
# GCN-NEXT: %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec
+# GCN-NEXT: dead %6:vgpr_32 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec
name: cluster_multi_use_cmp_cndmask2
registers:
- { id: 0, class: vgpr_32 }
%1 = V_MOV_B32_e32 0, implicit $exec
%4 = V_CMP_EQ_I32_e64 %0, %1, implicit $exec
%2 = V_MOV_B32_e32 0, implicit $exec
- %5 = V_CNDMASK_B32_e64 %2, %1, %4, implicit $exec
+ %5 = V_CNDMASK_B32_e64 0, %2, 0, %1, %4, implicit $exec
%3 = V_MOV_B32_e32 0, implicit $exec
- %6 = V_CNDMASK_B32_e64 %1, %3, %4, implicit $exec
+ %6 = V_CNDMASK_B32_e64 0, %1, 0, %3, %4, implicit $exec
...
bb.2:
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec
- %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
bb.2:
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
- %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
bb.2:
%1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
- %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %1, implicit $exec
+ %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
# GCN: name: negated_cond_vop2_redef_vcc1
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
# GCN-NEXT: $vcc_lo = COPY $sgpr0
# GCN-NEXT: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
$vcc_lo = COPY $sgpr0
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
# GCN: name: negated_cond_vop2_redef_vcc2
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
# GCN-NEXT: $vcc_hi = COPY $sgpr0
# GCN-NEXT: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
$vcc_hi = COPY $sgpr0
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
# GCN: name: negated_cond_vop3_redef_cmp
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
# GCN-NEXT: %2.sub1:sreg_64_xexec = COPY $sgpr0
# GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
%2.sub1 = COPY $sgpr0
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
body: |
bb.0:
$vcc = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, $vcc, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $vcc, implicit $exec
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
body: |
bb.0:
$vcc = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, $vcc, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $vcc, implicit $exec
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 killed $vcc, $exec, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
# GCN: name: negated_cond_vop3_redef_sel
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: %1:vgpr_32 = COPY $vgpr0
# GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
# GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%1:vgpr_32 = COPY $vgpr0
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
# GCN: name: negated_cond_vop2_used_sel
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
---
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
# GCN: name: negated_cond_vop2_used_vcc
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
# GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc
# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
$sgpr0_sgpr1 = COPY $vcc
$vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
# GCN: name: negated_cond_vop3_sel_wrong_subreg1
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
# GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
# GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
# GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
%1.sub1 = IMPLICIT_DEF
- %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
# GCN: name: negated_cond_vop3_sel_wrong_subreg2
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
# GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
# GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%1.sub1 = IMPLICIT_DEF
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub1, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
%1.sub1 = IMPLICIT_DEF
- %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub0, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
S_CBRANCH_VCCNZ %bb.2, implicit killed $vcc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1.sub0:vreg_64 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%1.sub1 = IMPLICIT_DEF
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub0, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
# GCN: name: negated_cond_vop3_sel_subreg_overlap
# GCN: %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN-NEXT: %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN-NEXT: %1.sub2_sub3:vreg_128 = IMPLICIT_DEF
# GCN-NEXT: %2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec
# GCN-NEXT: $vcc = S_AND_B64 %2, $exec, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1.sub2:vreg_128 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%1.sub2_sub3 = IMPLICIT_DEF
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1.sub2, 1, implicit $exec
$vcc = S_AND_B64 killed %2, $exec, implicit-def dead $scc
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
bb.1:
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
...
# GCN: name: negated_cond_vop2_different_blocks_cmp_and
-# GCN: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+# GCN: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
# GCN: $vcc = S_AND_B64 $exec, %2, implicit-def dead $scc
# GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
---
body: |
bb.0:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
%2:sreg_64_xexec = V_CMP_NE_U32_e64 %1, 1, implicit $exec
bb.1:
...
# GCN: name: negated_cond_vop2_not_dominated_blocks
-# GCN: V_CNDMASK_B32_e64 0, 1,
+# GCN: V_CNDMASK_B32_e64 0, 0, 0, 1,
# GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
# GCN-NEXT: S_CBRANCH_VCCNZ %bb.4, implicit $vcc
---
bb.1:
%0:sreg_64_xexec = IMPLICIT_DEF
- %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %0, implicit $exec
+ %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
bb.2:
V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
%46 = V_AND_B32_e32 1, killed %45, implicit $exec
%21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0 :: (dereferenceable invariant load 4)
%25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec
- %26 = V_CNDMASK_B32_e64 0, -1, killed %25, implicit $exec
+ %26 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed %25, implicit $exec
%62 = IMPLICIT_DEF
bb.29:
...
# GCN-LABEL: name: shrink_add_vop3{{$}}
# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, implicit $exec
-# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec
+# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
name: shrink_add_vop3
alignment: 0
exposesReturnsTwice: false
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, %9 = V_ADD_I32_e64 %19, %17, implicit $exec
- %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec
+ %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
---
# GCN-LABEL: name: shrink_sub_vop3{{$}}
# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, implicit $exec
-# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec
+# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
name: shrink_sub_vop3
alignment: 0
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, %9 = V_SUB_I32_e64 %19, %17, implicit $exec
- %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec
+ %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
---
# GCN-LABEL: name: shrink_subrev_vop3{{$}}
# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, implicit $exec
-# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec
+# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
name: shrink_subrev_vop3
alignment: 0
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, %9 = V_SUBREV_I32_e64 %19, %17, implicit $exec
- %24 = V_CNDMASK_B32_e64 0, 1, killed %9, implicit $exec
+ %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
---
# GCN-LABEL: name: check_addc_src2_vop3{{$}}
# GCN: %29:vgpr_32, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec
-# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec
+# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
name: check_addc_src2_vop3
alignment: 0
exposesReturnsTwice: false
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%9 = S_MOV_B64 0
%29, $vcc = V_ADDC_U32_e64 %19, %17, %9, implicit $exec
- %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec
+ %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
---
# GCN-LABEL: name: shrink_addc_vop3{{$}}
# GCN: %29:vgpr_32 = V_ADDC_U32_e32 %19, %17, implicit-def $vcc, implicit $vcc, implicit $exec
-# GCN %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec
+# GCN %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
name: shrink_addc_vop3
alignment: 0
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
$vcc = S_MOV_B64 0
%29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, implicit $exec
- %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec
+ %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
---
# GCN-LABEL: name: shrink_addc_undef_vcc{{$}}
# GCN: %29:vgpr_32 = V_ADDC_U32_e32 %19, %17, implicit-def $vcc, implicit undef $vcc, implicit $exec
-# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec
+# GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
name: shrink_addc_undef_vcc
alignment: 0
exposesReturnsTwice: false
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, implicit $exec
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, implicit $exec
%29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, implicit $exec
- %24 = V_CNDMASK_B32_e64 0, 1, killed $vcc, implicit $exec
+ %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, implicit $exec
S_ENDPGM 0
%36:vgpr_32 = V_MAC_F32_e32 0, %33, %36, implicit $exec
%37:vgpr_32 = V_MAD_F32 0, %35, 0, 0, 0, 0, 0, 0, implicit $exec
%38:sreg_64_xexec = V_CMP_NE_U32_e64 0, %5, implicit $exec
- %39:vgpr_32 = V_CNDMASK_B32_e64 0, 1, %38, implicit $exec
+ %39:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %38, implicit $exec
V_CMP_NE_U32_e32 1, %39, implicit-def $vcc, implicit $exec
$vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
%40:vgpr_32 = V_ADD_F32_e32 %36, %37, implicit $exec
$vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
$vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
- $vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
+ $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
$sgpr0_sgpr1 = COPY $exec, implicit-def $exec
SI_SPILL_S64_SAVE $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$sgpr2_sgpr3 = S_AND_B64 killed $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
$vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`)
$vcc = V_CMP_NE_U32_e64 0, $vgpr0, implicit $exec
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 1, killed $vgpr1, implicit $exec
- $vgpr1 = V_CNDMASK_B32_e64 0, -1, killed $sgpr0_sgpr1, implicit $exec
+ $vgpr1 = V_CNDMASK_B32_e64 0, 0, 0, -1, killed $sgpr0_sgpr1, implicit $exec
$sgpr0_sgpr1 = COPY $exec, implicit-def $exec
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def dead $scc
SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (store 8 into %stack.0, align 4, addrspace 5)
$vgpr4 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile load 4 from `float addrspace(1)* null`, addrspace 1)
$sgpr0_sgpr1 = V_CMP_EQ_U32_e64 3, killed $sgpr4, implicit $exec
- $vgpr3 = V_CNDMASK_B32_e64 -1082130432, 1065353216, killed $sgpr0_sgpr1, implicit $exec
+ $vgpr3 = V_CNDMASK_B32_e64 0, -1082130432, 0, 1065353216, killed $sgpr0_sgpr1, implicit $exec
$vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec, implicit $exec
S_BRANCH %bb.1
// VI: v_add_f32_e64 v1, v3, v5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x02,0x00]
-// TODO: Modifier tests
+// TODO: Modifier tests (v_cndmask done)
v_cndmask_b32 v1, v3, v5, s[4:5]
// SICI: v_cndmask_b32_e64 v1, v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x00]
// SICI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0xaa,0x01]
// VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01]
+v_cndmask_b32 v1, -v3, v5, s[4:5]
+// SICI: v_cndmask_b32_e64 v1, -v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd2,0x03,0x0b,0x12,0x20]
+// VI: v_cndmask_b32_e64 v1, -v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x20]
+
+v_cndmask_b32_e64 v1, v3, |v5|, s[4:5]
+// SICI: v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] ; encoding: [0x01,0x02,0x00,0xd2,0x03,0x0b,0x12,0x00]
+// VI: v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] ; encoding: [0x01,0x02,0x00,0xd1,0x03,0x0b,0x12,0x00]
+
+v_cndmask_b32_e64 v1, -abs(v3), v5, vcc
+// SICI: v_cndmask_b32_e64 v1, -|v3|, v5, vcc ; encoding: [0x01,0x01,0x00,0xd2,0x03,0x0b,0xaa,0x21]
+// VI: v_cndmask_b32_e64 v1, -|v3|, v5, vcc ; encoding: [0x01,0x01,0x00,0xd1,0x03,0x0b,0xaa,0x21]
+
//TODO: readlane, writelane
v_add_f32 v1, v3, s5
# VI: v_cndmask_b32_e64 v1, v3, v5, vcc ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0xaa,0x01]
0x01 0x00 0x00 0xd1 0x03 0x0b 0xaa 0x01
+# VI: v_cndmask_b32_e64 v1, -v3, v5, s[4:5] ; encoding: [0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x20]
+0x01,0x00,0x00,0xd1,0x03,0x0b,0x12,0x20
+
+# VI: v_cndmask_b32_e64 v1, v3, |v5|, s[4:5] ; encoding: [0x01,0x02,0x00,0xd1,0x03,0x0b,0x12,0x00]
+0x01,0x02,0x00,0xd1,0x03,0x0b,0x12,0x00
+
+# VI: v_cndmask_b32_e64 v1, -|v3|, v5, vcc ; encoding: [0x01,0x01,0x00,0xd1,0x03,0x0b,0xaa,0x21]
+0x01,0x01,0x00,0xd1,0x03,0x0b,0xaa,0x21
+
# VI: v_add_f32_e64 v1, v3, s5 ; encoding: [0x01,0x00,0x01,0xd1,0x03,0x0b,0x00,0x00]
0x01 0x00 0x01 0xd1 0x03 0x0b 0x00 0x00