return true;
}
+// Check that this is a G_XOR x, -1
+static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
+ if (MI.getOpcode() != TargetOpcode::G_XOR)
+ return false;
+ auto ConstVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+ return ConstVal && *ConstVal == -1;
+}
+
// Return the use branch instruction, otherwise null if the usage is invalid.
-static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineInstr *&Br,
- MachineBasicBlock *&UncondBrTarget) {
+static MachineInstr *
+verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br,
+ MachineBasicBlock *&UncondBrTarget, bool &Negated) {
Register CondDef = MI.getOperand(0).getReg();
if (!MRI.hasOneNonDBGUse(CondDef))
return nullptr;
MachineBasicBlock *Parent = MI.getParent();
- MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
- if (UseMI.getParent() != Parent ||
- UseMI.getOpcode() != AMDGPU::G_BRCOND)
+ MachineInstr *UseMI = &*MRI.use_instr_nodbg_begin(CondDef);
+
+ if (isNot(MRI, *UseMI)) {
+ Register NegatedCond = UseMI->getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(NegatedCond))
+ return nullptr;
+
+ // We're deleting the def of this value, so we need to remove it.
+ UseMI->eraseFromParent();
+
+ UseMI = &*MRI.use_instr_nodbg_begin(NegatedCond);
+ Negated = true;
+ }
+
+ if (UseMI->getParent() != Parent || UseMI->getOpcode() != AMDGPU::G_BRCOND)
return nullptr;
// Make sure the cond br is followed by a G_BR, or is the last instruction.
- MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
+ MachineBasicBlock::iterator Next = std::next(UseMI->getIterator());
if (Next == Parent->end()) {
MachineFunction::iterator NextMBB = std::next(Parent->getIterator());
if (NextMBB == Parent->getParent()->end()) // Illegal intrinsic use.
UncondBrTarget = Br->getOperand(0).getMBB();
}
- return &UseMI;
+ return UseMI;
}
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
case Intrinsic::amdgcn_else: {
MachineInstr *Br = nullptr;
MachineBasicBlock *UncondBrTarget = nullptr;
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
+ bool Negated = false;
+ if (MachineInstr *BrCond =
+ verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
Register Use = MI.getOperand(3).getReg();
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
+
+ if (Negated)
+ std::swap(CondBrTarget, UncondBrTarget);
+
B.setInsertPt(B.getMBB(), BrCond->getIterator());
if (IntrID == Intrinsic::amdgcn_if) {
B.buildInstr(AMDGPU::SI_IF)
case Intrinsic::amdgcn_loop: {
MachineInstr *Br = nullptr;
MachineBasicBlock *UncondBrTarget = nullptr;
- if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget)) {
+ bool Negated = false;
+ if (MachineInstr *BrCond =
+ verifyCFIntrinsic(MI, MRI, Br, UncondBrTarget, Negated)) {
const SIRegisterInfo *TRI
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
MachineBasicBlock *CondBrTarget = BrCond->getOperand(1).getMBB();
Register Reg = MI.getOperand(2).getReg();
+ if (Negated)
+ std::swap(CondBrTarget, UncondBrTarget);
+
B.setInsertPt(B.getMBB(), BrCond->getIterator());
B.buildInstr(AMDGPU::SI_LOOP)
.addUse(Reg)
# ERR: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_different_block)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_brcond_user)
# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_multi_user)
-# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: si_if_not_condition)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_xor_0)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_or_neg1)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2:_(s1) (in function: brcond_si_if_negated_multi_use)
---
...
+# Make sure we only match G_XOR (if), -1
---
-name: si_if_not_condition
+name: brcond_si_if_xor_0
body: |
bb.0:
+ successors: %bb.1
+ liveins: $vgpr0, $vgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s1) = G_ICMP intpred(ne), %0, %1
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %5:_(s1) = G_CONSTANT i1 false
+ %6:_(s1) = G_XOR %3, %5
+ G_BRCOND %6, %bb.2
+ G_BR %bb.3
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ S_NOP 1
+
+ bb.3:
+ S_NOP 2
+...
+
+# Make sure we only match G_XOR (if), -1
+---
+name: brcond_si_if_or_neg1
+body: |
+ bb.0:
+ successors: %bb.1
+ liveins: $vgpr0, $vgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s1) = G_ICMP intpred(ne), %0, %1
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %5:_(s1) = G_CONSTANT i1 true
+ %6:_(s1) = G_OR %3, %5
+ G_BRCOND %6, %bb.2
+ G_BR %bb.3
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ S_NOP 1
+
+ bb.3:
+ S_NOP 2
+...
+
+---
+name: brcond_si_if_negated_multi_use
+body: |
+ bb.0:
+ successors: %bb.1
liveins: $vgpr0, $vgpr1
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
%5:_(s1) = G_CONSTANT i1 true
%6:_(s1) = G_XOR %3, %5
- G_BRCOND %6, %bb.1
+ S_NOP 0, implicit %6
+ G_BRCOND %6, %bb.2
+ G_BR %bb.3
bb.1:
+ S_NOP 0
+
+ bb.2:
+ S_NOP 1
+ bb.3:
+ S_NOP 2
...
bb.2:
S_NOP 0
...
+
+---
+name: brcond_si_if_negated
+body: |
+ ; WAVE64-LABEL: name: brcond_si_if_negated
+ ; WAVE64: bb.0:
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: G_BR %bb.1
+ ; WAVE64: bb.1:
+ ; WAVE64: successors: %bb.2(0x80000000)
+ ; WAVE64: S_NOP 0
+ ; WAVE64: bb.2:
+ ; WAVE64: S_NOP 1
+ ; WAVE32-LABEL: name: brcond_si_if_negated
+ ; WAVE32: bb.0:
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: G_BR %bb.1
+ ; WAVE32: bb.1:
+ ; WAVE32: successors: %bb.2(0x80000000)
+ ; WAVE32: S_NOP 0
+ ; WAVE32: bb.2:
+ ; WAVE32: S_NOP 1
+ bb.0:
+ successors: %bb.1
+ liveins: $vgpr0, $vgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s1) = G_ICMP intpred(ne), %0, %1
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %5:_(s1) = G_CONSTANT i1 true
+ %6:_(s1) = G_XOR %3, %5
+ G_BRCOND %6, %bb.2
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ S_NOP 1
+...
+
+---
+name: brcond_si_if_br_negated
+body: |
+ ; WAVE64-LABEL: name: brcond_si_if_br_negated
+ ; WAVE64: bb.0:
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE64: [[ICMP:%[0-9]+]]:sreg_64_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE64: [[SI_IF:%[0-9]+]]:sreg_64_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: G_BR %bb.3
+ ; WAVE64: bb.1:
+ ; WAVE64: successors: %bb.2(0x80000000)
+ ; WAVE64: S_NOP 0
+ ; WAVE64: bb.2:
+ ; WAVE64: successors: %bb.3(0x80000000)
+ ; WAVE64: S_NOP 1
+ ; WAVE64: bb.3:
+ ; WAVE64: S_NOP 2
+ ; WAVE32-LABEL: name: brcond_si_if_br_negated
+ ; WAVE32: bb.0:
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE32: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[COPY1]]
+ ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE32: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s64) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: G_BR %bb.3
+ ; WAVE32: bb.1:
+ ; WAVE32: successors: %bb.2(0x80000000)
+ ; WAVE32: S_NOP 0
+ ; WAVE32: bb.2:
+ ; WAVE32: successors: %bb.3(0x80000000)
+ ; WAVE32: S_NOP 1
+ ; WAVE32: bb.3:
+ ; WAVE32: S_NOP 2
+ bb.0:
+ successors: %bb.1
+ liveins: $vgpr0, $vgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s1) = G_ICMP intpred(ne), %0, %1
+ %3:_(s1), %4:_(s64) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.if), %2
+ %5:_(s1) = G_CONSTANT i1 true
+ %6:_(s1) = G_XOR %3, %5
+ G_BRCOND %6, %bb.2
+ G_BR %bb.3
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ S_NOP 1
+
+ bb.3:
+ S_NOP 2
+...
+
+---
+name: brcond_si_loop_brcond_negated
+tracksRegLiveness: true
+body: |
+ ; WAVE64-LABEL: name: brcond_si_loop_brcond_negated
+ ; WAVE64: bb.0:
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+ ; WAVE64: bb.1:
+ ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WAVE64: S_NOP 0
+ ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: G_BR %bb.2
+ ; WAVE64: bb.2:
+ ; WAVE64: S_NOP 0
+ ; WAVE32-LABEL: name: brcond_si_loop_brcond_negated
+ ; WAVE32: bb.0:
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+ ; WAVE32: bb.1:
+ ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WAVE32: S_NOP 0
+ ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: G_BR %bb.2
+ ; WAVE32: bb.2:
+ ; WAVE32: S_NOP 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s64) = COPY $sgpr0_sgpr1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ S_NOP 0
+ %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %4:_(s1) = G_CONSTANT i1 true
+ %5:_(s1) = G_XOR %3, %4
+ G_BRCOND %5, %bb.1
+
+ bb.2:
+ S_NOP 0
+...
+
+---
+name: brcond_si_loop_brcond_br_negated
+tracksRegLiveness: true
+body: |
+ ; WAVE64-LABEL: name: brcond_si_loop_brcond_br_negated
+ ; WAVE64: bb.0:
+ ; WAVE64: successors: %bb.1(0x80000000)
+ ; WAVE64: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ ; WAVE64: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE64: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE64: [[COPY2:%[0-9]+]]:sreg_64_xexec(s64) = COPY $sgpr0_sgpr1
+ ; WAVE64: bb.1:
+ ; WAVE64: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WAVE64: S_NOP 0
+ ; WAVE64: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE64: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE64: G_BR %bb.1
+ ; WAVE64: bb.2:
+ ; WAVE64: S_NOP 0
+ ; WAVE32-LABEL: name: brcond_si_loop_brcond_br_negated
+ ; WAVE32: bb.0:
+ ; WAVE32: successors: %bb.1(0x80000000)
+ ; WAVE32: liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ ; WAVE32: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; WAVE32: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec(s64) = COPY $sgpr0_sgpr1
+ ; WAVE32: bb.1:
+ ; WAVE32: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WAVE32: S_NOP 0
+ ; WAVE32: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
+ ; WAVE32: SI_LOOP [[COPY2]](s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ ; WAVE32: G_BR %bb.1
+ ; WAVE32: bb.2:
+ ; WAVE32: S_NOP 0
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $sgpr0_sgpr1
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s64) = COPY $sgpr0_sgpr1
+
+ bb.1:
+ successors: %bb.1, %bb.2
+ S_NOP 0
+ %3:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), %2
+ %4:_(s1) = G_CONSTANT i1 true
+ %5:_(s1) = G_XOR %3, %4
+ G_BRCOND %5, %bb.2
+ G_BR %bb.1
+
+ bb.2:
+ S_NOP 0
+...