/// \returns Whether the tracked divergence state of \p DivVal changed.
bool markDivergent(const InstructionT &I);
bool markDivergent(ConstValueRefT DivVal);
- bool markDefsDivergent(const InstructionT &Instr,
- bool AllDefsDivergent = true);
+ bool markDefsDivergent(const InstructionT &Instr);
/// \brief Propagate divergence to all instructions in the region.
/// Divergence is seeded by calls to \p markDivergent.
/// Get the total number of register banks.
unsigned getNumRegBanks() const { return NumRegBanks; }
+ /// Returns true if the register bank is considered divergent.
+ virtual bool isDivergentRegBank(const RegisterBank *RB) const {
+ return false;
+ }
+
/// Get a register bank that covers \p RC.
///
/// \pre \p RC is a user-defined register class (as opposed as one
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
return false;
}
+ /// Returns true if the register is considered uniform.
+ virtual bool isUniformReg(const MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI, Register Reg) const {
+ return false;
+ }
+
/// Physical registers that may be modified within a function but are
/// guaranteed to be restored before any uses. This is useful for targets that
/// have call sequences where a GOT register may be updated by the caller
template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
- const Instruction &Instr, bool AllDefsDivergent) {
+ const Instruction &Instr) {
return markDivergent(&Instr);
}
template <>
bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::markDefsDivergent(
- const MachineInstr &Instr, bool AllDefsDivergent) {
+ const MachineInstr &Instr) {
bool insertedDivergent = false;
const auto &MRI = F.getRegInfo();
+ const auto &RBI = *F.getSubtarget().getRegBankInfo();
const auto &TRI = *MRI.getTargetRegisterInfo();
for (auto &op : Instr.operands()) {
if (!op.isReg() || !op.isDef())
if (!op.getReg().isVirtual())
continue;
assert(!op.getSubReg());
- if (!AllDefsDivergent) {
- auto *RC = MRI.getRegClassOrNull(op.getReg());
- if (RC && !TRI.isDivergentRegClass(RC))
- continue;
- }
+ if (TRI.isUniformReg(MRI, RBI, op.getReg()))
+ continue;
insertedDivergent |= markDivergent(op.getReg());
}
return insertedDivergent;
}
if (uniformity == InstructionUniformity::NeverUniform) {
- markDefsDivergent(instr, /* AllDefsDivergent = */ false);
+ if (markDivergent(instr))
+ Worklist.push_back(&instr);
}
}
}
return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
}
+bool AMDGPURegisterBankInfo::isDivergentRegBank(const RegisterBank *RB) const {
+ return RB != &AMDGPU::SGPRRegBank;
+}
+
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
unsigned Size) const {
public:
AMDGPURegisterBankInfo(const GCNSubtarget &STI);
+ bool isDivergentRegBank(const RegisterBank *RB) const override;
+
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
let Size = 8;
let isBranch = 1;
let hasSideEffects = 1;
+ let IsNeverUniform = 1;
}
} // End isTerminator = 1
return MCRegister();
}
+bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI,
+ Register Reg) const {
+ auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
+ if (!RB)
+ return false;
+
+ return !RBI.isDivergentRegBank(RB);
+}
+
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
+ // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
+ // (such as VCC) which hold a wave-wide vector of boolean values. Examining
+ // just the register class is not suffcient; it needs to be combined with a
+ // value type. The next predicate isUniformReg() does this correctly.
bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
return !isSGPRClass(RC);
}
+ bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
+ Register Reg) const override;
+
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const;
bb.0:
liveins: $vgpr0
; CHECK-LABEL: MachineUniformityInfo for function: asm_sgpr
- ; FIXME: This is backwards
- ; CHECK: DIVERGENT: %1
+ ; CHECK-NOT: DIVERGENT: %1
%0:_(s32) = COPY $vgpr0
%2:vgpr_32 = COPY %0(s32)
%3:sreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%5:vreg_64 = COPY %3
%6:vreg_64 = COPY %3
- ; CHECK: DIVERGENT
- ; CHECK-SAME: FLAT_ATOMIC_SWAP_RTN
+ ; CHECK: DIVERGENT{{.*}}FLAT_ATOMIC_SWAP_RTN
%4:vgpr_32 = FLAT_ATOMIC_SWAP_RTN killed %5, %2, 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst (s32))
- ; CHECK: DIVERGENT
- ; CHECK-SAME: FLAT_ATOMIC_SWAP_RTN
+ ; CHECK: DIVERGENT{{.*}}FLAT_ATOMIC_SWAP_RTN
%7:vgpr_32 = FLAT_ATOMIC_SWAP_RTN killed %6, %2, 0, 1, implicit $exec, implicit $flat_scr ; No memopernads
$vgpr0 = COPY %4
SI_RETURN implicit $vgpr0
%5:sreg_64 = REG_SEQUENCE %3, %subreg.sub0, %2, %subreg.sub1
%7:vreg_64 = COPY %4
%8:vreg_64 = COPY %5
- ; CHECK: DIVERGENT
- ; CHECK-SAME: FLAT_ATOMIC_CMPSWAP_RTN
+ ; CHECK: DIVERGENT{{.*}}FLAT_ATOMIC_CMPSWAP_RTN
%6:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN killed %7, killed %8, 0, 1, implicit $exec, implicit $flat_scr :: (load store seq_cst seq_cst (s32))
%9:sreg_64_xexec = V_CMP_EQ_U32_e64 %6, %2, implicit $exec
%10:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%3:sreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%5:vreg_64 = COPY %3
- ; CHECK: DIVERGENT
- ; CHECK-SAME: GLOBAL_ATOMIC_INC_RTN
+ ; CHECK: DIVERGENT{{.*}}GLOBAL_ATOMIC_INC_RTN
%4:vgpr_32 = GLOBAL_ATOMIC_INC_RTN killed %5, %2, 0, 1, implicit $exec :: (load store (s32), addrspace 1)
$vgpr0 = COPY %4
SI_RETURN implicit $vgpr0
%5:sreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%7:vreg_64 = COPY %5
%8:vreg_64 = COPY %4
- ; CHECK: DIVERGENT
- ; CHECK-SAME: GLOBAL_ATOMIC_INC_X2_RTN
+ ; CHECK: DIVERGENT{{.*}}GLOBAL_ATOMIC_INC_X2_RTN
%6:vreg_64 = GLOBAL_ATOMIC_INC_X2_RTN killed %7, killed %8, 0, 1, implicit $exec :: (load store (s64), addrspace 1)
%9:vgpr_32 = COPY %6.sub1
%10:vgpr_32 = COPY %6.sub0
%0:vgpr_32 = IMPLICIT_DEF
%3:sreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%5:vreg_64 = COPY %3
- ; CHECK: DIVERGENT
- ; CHECK-SAME: GLOBAL_ATOMIC_DEC_RTN
+ ; CHECK: DIVERGENT{{.*}}GLOBAL_ATOMIC_DEC_RTN
%4:vgpr_32 = GLOBAL_ATOMIC_DEC_RTN killed %5, %2, 0, 1, implicit $exec :: (load store (s32), addrspace 1)
$vgpr0 = COPY %4
SI_RETURN implicit $vgpr0
%5:sreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
%7:vreg_64 = COPY %5
%8:vreg_64 = COPY %4
- ; CHECK: DIVERGENT
- ; CHECK-SAME: GLOBAL_ATOMIC_DEC_X2_RTN
+ ; CHECK: DIVERGENT{{.*}}GLOBAL_ATOMIC_DEC_X2_RTN
%6:vreg_64 = GLOBAL_ATOMIC_DEC_X2_RTN killed %7, killed %8, 0, 1, implicit $exec :: (load store (s64), addrspace 1)
%9:vgpr_32 = COPY %6.sub1
%10:vgpr_32 = COPY %6.sub0
+++ /dev/null
-# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
-
----
-# CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge
-# CHECK-LABEL: BLOCK bb.0
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:vgpr_32(s32) = COPY $vgpr0
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:sreg_64 = V_CMP_GT_I32_e64
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:sreg_64 = V_CMP_LT_I32_e64
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:vreg_1 = COPY
-# CHECK: DIVERGENT: %{{[0-9]*}}:sreg_64 = SI_IF
-# CHECK: DIVERGENT: S_BRANCH %bb.1
-# CHECK-LABEL: BLOCK bb.2
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:sreg_32 = PHI %{{[0-9]*}}:sreg_32, %bb.0, %{{[0-9]*}}:sreg_32, %bb.1
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:vreg_1 = PHI %{{[0-9]*}}:vreg_1, %bb.0, %{{[0-9]*}}:sreg_64, %bb.1
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:sreg_64 = COPY %{{[0-9]*}}:vreg_1
-# CHECK: DIVERGENT: %{{[0-9]*}}:sreg_64 = SI_IF %{{[0-9]*}}:sreg_64, %bb.4
-# CHECK: DIVERGENT: S_BRANCH %bb.3
-# CHECK-LABEL: BLOCK bb.3
-# CHECK-LABEL: BLOCK bb.4
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:vgpr_32 = PHI %{{[0-9]*}}:sreg_32, %bb.2, %{{[0-9]*}}:sreg_32, %bb.3
-
-name: hidden_diverge
-tracksRegLiveness: true
-body: |
- bb.0:
- successors: %bb.1(0x40000000), %bb.2(0x40000000)
- liveins: $vgpr0, $sgpr0_sgpr1
-
- %11:sgpr_64(p4) = COPY $sgpr0_sgpr1
- %10:vgpr_32(s32) = COPY $vgpr0
- %15:sreg_64_xexec = S_LOAD_DWORDX2_IMM %11(p4), 36, 0
- %16:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %11(p4), 44, 0
- %17:sreg_32 = COPY %15.sub1
- %18:sreg_32 = COPY %15.sub0
- %19:sgpr_96 = REG_SEQUENCE killed %18, %subreg.sub0, killed %17, %subreg.sub1, killed %16, %subreg.sub2
- %0:sgpr_96 = COPY %19
- %20:sreg_32 = S_MOV_B32 -1
- %21:sreg_64 = V_CMP_GT_I32_e64 %10(s32), killed %20, implicit $exec
- %22:sreg_32 = S_MOV_B32 0
- %23:sreg_64 = V_CMP_LT_I32_e64 %10(s32), killed %22, implicit $exec
- %1:vreg_1 = COPY %21
- %14:sreg_32 = IMPLICIT_DEF
- %2:sreg_64 = SI_IF killed %23, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- S_BRANCH %bb.1
-
- bb.1:
- successors: %bb.2(0x80000000)
-
- %24:sreg_32 = COPY %0.sub0
- %3:sreg_32 = COPY %0.sub1
- %25:sreg_32 = S_MOV_B32 0
- S_CMP_LT_I32 killed %24, killed %25, implicit-def $scc
- %26:sreg_64 = COPY $scc
- %4:sreg_64 = COPY %26
-
- bb.2:
- successors: %bb.3(0x40000000), %bb.4(0x40000000)
-
- %5:sreg_32 = PHI %14, %bb.0, %3, %bb.1
- %6:vreg_1 = PHI %1, %bb.0, %4, %bb.1
- SI_END_CF %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- %27:sreg_64 = COPY %6
- %7:sreg_64 = SI_IF %27, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- S_BRANCH %bb.3
-
- bb.3:
- successors: %bb.4(0x80000000)
-
- %8:sreg_32 = COPY %0.sub2
-
- bb.4:
- %9:vgpr_32 = PHI %5, %bb.2, %8, %bb.3
- SI_END_CF %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
- %28:sreg_64 = IMPLICIT_DEF
- %29:vreg_64 = COPY %28
- GLOBAL_STORE_DWORD killed %29, %9, 0, 0, implicit $exec
- S_ENDPGM 0
-
-...
--- /dev/null
+# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+# CHECK-LABEL: MachineUniformityInfo for function: hidden_loop_diverge
+
+# CHECK-LABEL: BLOCK bb.0
+# CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt), %{{[0-9]*}}:_(s32), %{{[0-9]*}}:_
+# CHECK-NOT: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
+# CHECK-NOT: DIVERGENT: G_BR %bb.1
+
+# CHECK-LABEL: BLOCK bb.1
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt), %{{[0-9]*}}:_(s32), %{{[0-9]*}}:_
+# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
+# CHECK: DIVERGENT: G_BR %bb.2
+
+# CHECK-LABEL: BLOCK bb.2
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(sgt), %{{[0-9]*}}:_(s32), %{{[0-9]*}}:_
+# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.4
+# CHECK: DIVERGENT: G_BR %bb.1
+
+# CHECK-LABEL: BLOCK bb.3
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.0, %{{[0-9]*}}:_(s32), %bb.1
+# CHECK-NOT: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.4
+# CHECK-NOT: DIVERGENT: G_BR %bb.5
+
+# CHECK-LABEL: BLOCK bb.4
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.2
+
+# CHECK-LABEL: BLOCK bb.5
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.4
+
+---
+name: hidden_loop_diverge
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.3, %bb.1
+ liveins: $sgpr4_sgpr5
+
+ %0:_(s32) = G_IMPLICIT_DEF
+ %20:_(s32) = G_IMPLICIT_DEF
+ %21:_(s32) = G_CONSTANT i32 42
+ %22:_(s32) = G_IMPLICIT_DEF
+ %1:_(s32) = G_CONSTANT i32 0
+ %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
+ %3:_(s1) = G_ICMP intpred(slt), %0(s32), %1
+ G_BRCOND %3(s1), %bb.3 ; Uniform branch
+ G_BR %bb.1
+ bb.1:
+ successors: %bb.3, %bb.2
+
+ %4:_(s32) = G_PHI %1(s32), %bb.0, %7(s32), %bb.2
+ %5:_(s1) = G_ICMP intpred(slt), %1(s32), %2(s32)
+ G_BRCOND %5(s1), %bb.3 ; Divergent exit
+ G_BR %bb.2
+ bb.2:
+ successors: %bb.4, %bb.1
+
+ %6:_(s32) = G_CONSTANT i32 1
+ %7:_(s32) = G_ADD %6(s32), %4(s32)
+ %8:_(s1) = G_ICMP intpred(sgt), %2(s32), %1(s32)
+ G_BRCOND %8(s1), %bb.4 ; Divergent exit
+ G_BR %bb.1
+ bb.3:
+ successors: %bb.4, %bb.5
+
+ %9:_(s32) = G_PHI %20(s32), %bb.0, %4(s32), %bb.1 ; Temporal divergent phi
+ G_BRCOND %3(s1), %bb.4
+ G_BR %bb.5
+
+ bb.4:
+ successors: %bb.5
+
+ %10:_(s32) = G_PHI %21(s32), %bb.3, %22(s32), %bb.2 ; Temporal divergent phi
+ G_BR %bb.5
+ bb.5:
+ %11:_(s32) = G_PHI %20(s32), %bb.3, %22(s32), %bb.4
+...
+++ /dev/null
-# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
-
-# CHECK-LABEL: MachineUniformityInfo for function: irreducible
-# CHECK: CYCLES ASSSUMED DIVERGENT:
-# CHECK: depth=1: entries(bb.2 bb.1) bb.3 bb.5 bb.4
-# CHECK: CYCLES WITH DIVERGENT EXIT:
-# CHECK-DAG: depth=1: entries(bb.2 bb.1) bb.3 bb.5 bb.4
-# CHECK-DAG: depth=2: entries(bb.3 bb.1) bb.5 bb.4
-
----
-name: irreducible
-tracksRegLiveness: true
-machineFunctionInfo:
- isEntryFunction: true
-body: |
- bb.0:
- successors: %bb.1, %bb.2
- liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
-
- %0:sreg_32 = IMPLICIT_DEF
- %2:vgpr_32 = COPY $vgpr0
- %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- S_CMP_EQ_U32 %0, 0, implicit-def $scc
- S_CBRANCH_SCC1 %bb.1, implicit $scc
- S_BRANCH %bb.2
-
- bb.1:
- %28:vgpr_32 = PHI %3, %bb.0, %49, %bb.5
- %29:vgpr_32 = V_ADD_U32_e64 %28, 1, 0, implicit $exec
- S_BRANCH %bb.3
-
- bb.2:
- %38:vgpr_32 = PHI %3, %bb.0, %49, %bb.4
- %39:vgpr_32 = V_ADD_U32_e64 %38, 2, 0, implicit $exec
-
- bb.3:
- %49:vgpr_32 = PHI %29, %bb.1, %39, %bb.2
-
- bb.4:
- successors: %bb.2, %bb.5
-
- %50:vgpr_32 = V_AND_B32_e32 3, %2, implicit $exec
- %51:sreg_64 = V_CMP_EQ_U32_e64 %50, 2, implicit $exec
- %52:sreg_64 = SI_IF killed %51:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-
- bb.5:
- successors: %bb.1, %bb.6
- %61:sreg_64 = V_CMP_EQ_U32_e64 %50, 1, implicit $exec
- %62:sreg_64 = SI_IF killed %61:sreg_64, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-
- bb.6:
- S_ENDPGM 0
-...
bb.0:
; CHECK-LABEL: MachineUniformityInfo for function: writelane
; CHECK: DIVERGENT: %4
- ; CHECK: DIVERGENT: %5
+
+ ; Note how %5 is the result of a vector compare, but it is reported as
+ ; uniform because it is stored in an sreg.
+ ; CHECK-NOT: DIVERGENT: %5
+
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:sgpr_32 = V_READFIRSTLANE_B32 %0, implicit $exec
+++ /dev/null
-# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
-# CHECK-LABEL: MachineUniformityInfo for function: hidden_loop_diverge
-
-# CHECK-LABEL: BLOCK bb.0
-# CHECK-NOT: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt), %{{[0-9]*}}:_(s32), %{{[0-9]*}}:_
-# CHECK-NOT: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
-# CHECK-NOT: DIVERGENT: G_BR %bb.1
-
-# CHECK-LABEL: BLOCK bb.1
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(slt), %{{[0-9]*}}:_(s32), %{{[0-9]*}}:_
-# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.3
-# CHECK: DIVERGENT: G_BR %bb.2
-
-# CHECK-LABEL: BLOCK bb.2
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s1) = G_ICMP intpred(sgt), %{{[0-9]*}}:_(s32), %{{[0-9]*}}:_
-# CHECK: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.4
-# CHECK: DIVERGENT: G_BR %bb.1
-
-# CHECK-LABEL: BLOCK bb.3
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.0, %{{[0-9]*}}:_(s32), %bb.1
-# CHECK-NOT: DIVERGENT: G_BRCOND %{{[0-9]*}}:_(s1), %bb.4
-# CHECK-NOT: DIVERGENT: G_BR %bb.5
-
-# CHECK-LABEL: BLOCK bb.4
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.2
-
-# CHECK-LABEL: BLOCK bb.5
-# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_PHI %{{[0-9]*}}:_(s32), %bb.3, %{{[0-9]*}}:_(s32), %bb.4
-
----
-name: hidden_loop_diverge
-tracksRegLiveness: true
-body: |
- bb.0:
- successors: %bb.3, %bb.1
- liveins: $sgpr4_sgpr5
-
- %0:_(s32) = G_IMPLICIT_DEF
- %20:_(s32) = G_IMPLICIT_DEF
- %21:_(s32) = G_CONSTANT i32 42
- %22:_(s32) = G_IMPLICIT_DEF
- %1:_(s32) = G_CONSTANT i32 0
- %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
- %3:_(s1) = G_ICMP intpred(slt), %0(s32), %1
- G_BRCOND %3(s1), %bb.3 ; Uniform branch
- G_BR %bb.1
- bb.1:
- successors: %bb.3, %bb.2
-
- %4:_(s32) = G_PHI %1(s32), %bb.0, %7(s32), %bb.2
- %5:_(s1) = G_ICMP intpred(slt), %1(s32), %2(s32)
- G_BRCOND %5(s1), %bb.3
- G_BR %bb.2
- bb.2:
- successors: %bb.4, %bb.1
-
- %6:_(s32) = G_CONSTANT i32 1
- %7:_(s32) = G_ADD %6(s32), %4(s32)
- %8:_(s1) = G_ICMP intpred(sgt), %2(s32), %1(s32)
- G_BRCOND %8(s1), %bb.4
- G_BR %bb.1
- bb.3:
- successors: %bb.4, %bb.5
-
- %9:_(s32) = G_PHI %20(s32), %bb.0, %4(s32), %bb.1 ; Temporal divergent phi
- G_BRCOND %3(s1), %bb.4
- G_BR %bb.5
-
- bb.4:
- successors: %bb.5
-
- %10:_(s32) = G_PHI %21(s32), %bb.3, %22(s32), %bb.2
- G_BR %bb.5
- bb.5:
- %11:_(s32) = G_PHI %20(s32), %bb.3, %22(s32), %bb.4
-...
--- /dev/null
+# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+
+---
+# CHECK-LABEL: MachineUniformityInfo for function: temporal_diverge
+name: temporal_diverge
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ liveins: $sgpr0_sgpr1
+
+ %15:_(s64) = G_CONSTANT i64 0
+
+ bb.2:
+ successors: %bb.3, %bb.2
+
+ %11:_(s64) = G_PHI %12(s64), %bb.2, %15(s64), %bb.1
+ %18:_(s1) = G_CONSTANT i1 false
+ %12:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %18(s1), %11(s64)
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %12(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.3
+
+ bb.3:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ %14:_(s64) = G_PHI %12(s64), %bb.2
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s64)
+ S_ENDPGM 0
+
+...
+---
+# CHECK-LABEL: MachineUniformityInfo for function: phi_at_exit
+name: phi_at_exit
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ successors: %bb.2, %bb.3
+ liveins: $sgpr0_sgpr1
+
+ %3:_(p4) = COPY $sgpr0_sgpr1
+ %7:_(p4) = COPY %3(p4)
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p4) = G_PTR_ADD %7, %8(s64)
+ %10:_(s32) = G_LOAD %9(p4) :: (load (s32), addrspace 4)
+ %11:_(s32) = G_CONSTANT i32 0
+ %12:_(s1) = G_ICMP intpred(sge), %10(s32), %11
+ G_BRCOND %12(s1), %bb.3
+ G_BR %bb.2
+
+ bb.2:
+ %24:_(s64) = G_CONSTANT i64 0
+ %14:_(s1) = G_CONSTANT i1 false
+ G_BR %bb.4
+
+ bb.3:
+ G_BR %bb.6
+
+ bb.4:
+ successors: %bb.5, %bb.4
+
+ %15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
+ %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.5
+
+ bb.5:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ %18:_(s64) = G_PHI %16(s64), %bb.4
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_BR %bb.3
+
+ bb.6:
+ S_ENDPGM 0
+
+...
+---
+# CHECK-LABEL: MachineUniformityInfo for function: phi_after_exit
+name: phi_after_exit
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ successors: %bb.2, %bb.3
+ liveins: $sgpr0_sgpr1
+
+ %3:_(p4) = COPY $sgpr0_sgpr1
+ %7:_(p4) = COPY %3(p4)
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p4) = G_PTR_ADD %7, %8(s64)
+ %10:_(s32) = G_LOAD %9(p4) :: (dereferenceable invariant load (s32), addrspace 4)
+ %11:_(s32) = G_CONSTANT i32 0
+ %12:_(s1) = G_ICMP intpred(sge), %10(s32), %11
+ G_BRCOND %12(s1), %bb.3
+ G_BR %bb.2
+
+ bb.2:
+ %24:_(s64) = G_CONSTANT i64 0
+ %14:_(s1) = G_CONSTANT i1 false
+ G_BR %bb.4
+
+ bb.3:
+ G_BR %bb.6
+
+ bb.4:
+ successors: %bb.5, %bb.4
+
+ %15:_(s64) = G_PHI %24(s64), %bb.2, %16(s64), %bb.4
+ %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %14(s1), %15(s64)
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %16(s64), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.5
+
+ bb.5:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ %18:_(s64) = G_PHI %16(s64), %bb.4
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_BR %bb.3
+
+ bb.6:
+ S_ENDPGM 0
+
+...
+---
+# CHECK-LABEL: MachineUniformityInfo for function: temporal_diverge_inloop
+name: temporal_diverge_inloop
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ liveins: $sgpr0_sgpr1
+
+ %3:_(p4) = COPY $sgpr0_sgpr1
+ %7:_(p4) = COPY %3(p4)
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p4) = G_PTR_ADD %7, %8(s64)
+ %10:_(s32) = G_LOAD %9(p4) :: (dereferenceable invariant load (s32), addrspace 4)
+ %12:_(s32) = G_CONSTANT i32 0
+ %13:_(s1) = G_ICMP intpred(slt), %10(s32), %12
+
+ bb.2:
+ %25:_(s64) = G_CONSTANT i64 0
+
+ bb.3:
+ successors: %bb.4, %bb.3
+
+ %15:_(s64) = G_PHI %25(s64), %bb.2, %16(s64), %bb.3
+ %24:_(s1) = G_CONSTANT i1 false
+ %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.4
+
+ bb.4:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ successors: %bb.5, %bb.2
+
+ %18:_(s64) = G_PHI %16(s64), %bb.3
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ G_BRCOND %13(s1), %bb.2
+ G_BR %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+
+...
+---
+# CHECK-LABEL: MachineUniformityInfo for function: temporal_uniform_indivloop
+name: temporal_uniform_indivloop
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ liveins: $sgpr0_sgpr1
+
+ %3:_(p4) = COPY $sgpr0_sgpr1
+ %19:_(s64) = G_CONSTANT i64 0
+ %7:_(p4) = COPY %3(p4)
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p4) = G_PTR_ADD %7, %8(s64)
+ %10:_(s32) = G_LOAD %9(p4) :: (dereferenceable invariant load (s32), addrspace 4)
+ %12:_(s32) = G_CONSTANT i32 0
+ %13:_(s1) = G_ICMP intpred(sge), %10(s32), %12
+
+ bb.2:
+ %15:_(s64) = G_PHI %16(s64), %bb.4, %19(s64), %bb.1
+ %24:_(s1) = G_CONSTANT i1 true
+ %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %15(s64)
+
+ bb.3:
+ successors: %bb.4, %bb.3
+
+ G_BRCOND %13(s1), %bb.3
+ G_BR %bb.4
+
+ bb.4:
+ successors: %bb.5, %bb.2
+
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %16(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.5
+
+ bb.5:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ %18:_(s64) = G_PHI %16(s64), %bb.4
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+ S_ENDPGM 0
+
+...
+---
+# CHECK-LABEL: MachineUniformityInfo for function: temporal_diverge_loopuser
+name: temporal_diverge_loopuser
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ liveins: $sgpr0_sgpr1
+
+ %3:_(p4) = COPY $sgpr0_sgpr1
+ %19:_(s64) = G_CONSTANT i64 0
+
+ bb.2:
+ successors: %bb.3, %bb.2
+
+ %10:_(s64) = G_PHI %11(s64), %bb.2, %19(s64), %bb.1
+ %24:_(s1) = G_CONSTANT i1 false
+ %11:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %24(s1), %10(s64)
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %11(s64), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.3
+
+ bb.3:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ ; CHECK-NOT: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ %13:_(s64) = G_PHI %11(s64), %bb.2
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %13(s64)
+ %14:_(p4) = COPY %3(p4)
+ %15:_(s64) = G_CONSTANT i64 40
+ %16:_(p4) = G_PTR_ADD %14, %15(s64)
+ %17:_(s32) = G_LOAD %16(p4) :: (dereferenceable invariant load (s32), addrspace 4)
+ %25:_(s32) = G_CONSTANT i32 0
+ %18:_(s1) = G_ICMP intpred(slt), %17(s32), %25
+
+ bb.4:
+ successors: %bb.5, %bb.4
+
+ G_BRCOND %18(s1), %bb.4
+ G_BR %bb.5
+
+ bb.5:
+ S_ENDPGM 0
+
+...
+---
+# CHECK-LABEL: MachineUniformityInfo for function: temporal_diverge_loopuser_nested
+name: temporal_diverge_loopuser_nested
+alignment: 1
+legalized: true
+tracksRegLiveness: true
+registers:
+ - { id: 3, class: _ }
+ - { id: 4, class: vgpr_32 }
+ - { id: 5, class: sgpr_32 }
+ - { id: 6, class: sgpr_32 }
+liveins:
+ - { reg: '$sgpr0_sgpr1', virtual-reg: '%3' }
+ - { reg: '$vgpr0', virtual-reg: '%4' }
+ - { reg: '$sgpr2', virtual-reg: '%5' }
+ - { reg: '$sgpr3', virtual-reg: '%6' }
+body: |
+ bb.1:
+ liveins: $sgpr0_sgpr1
+
+ %3:_(p4) = COPY $sgpr0_sgpr1
+ %7:_(p4) = COPY %3(p4)
+ %8:_(s64) = G_CONSTANT i64 40
+ %9:_(p4) = G_PTR_ADD %7, %8(s64)
+ %10:_(s32) = G_LOAD %9(p4) :: (dereferenceable invariant load (s32), addrspace 4)
+ %12:_(s32) = G_CONSTANT i32 0
+ %13:_(s1) = G_ICMP intpred(sge), %10(s32), %12
+
+ bb.2:
+ %23:_(s64) = G_CONSTANT i64 0
+
+ bb.3:
+ successors: %bb.4, %bb.3
+
+ %15:_(s64) = G_PHI %23(s64), %bb.2, %16(s64), %bb.3
+ %25:_(s1) = G_CONSTANT i1 false
+ %16:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %25(s1), %15(s64)
+ ; CHECK: DIVERGENT: SI_LOOP
+ SI_LOOP %16(s64), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec
+ G_BR %bb.4
+
+ bb.4:
+ ; CHECK: DIVERGENT: %{{[0-9]+}}: %{{[0-9]+}}:_(s64) = G_PHI
+ %18:_(s64) = G_PHI %16(s64), %bb.3
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %18(s64)
+
+ bb.5:
+
+ bb.6:
+ successors: %bb.8, %bb.5
+
+ G_BRCOND %13(s1), %bb.8
+ G_BR %bb.5
+
+ bb.7:
+ S_ENDPGM 0
+
+ bb.8:
+ successors: %bb.7, %bb.2
+
+ %24:_(s1) = G_CONSTANT i1 false
+ G_BRCOND %24(s1), %bb.7
+ G_BR %bb.2
+
+...
--- /dev/null
+# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+
+# This test was generated using SelectionDAG, where the compilation flow does
+# not match the assumptions made in MachineUA. For now, this test mostly serves
+# the purpose of catching in any crash when invoking MachineUA. The test should
+# be deleted when it is clear that it is not actually testing anything useful.
+
+---
+# CHECK-LABEL: MachineUniformityInfo for function: hidden_diverge
+# CHECK-LABEL: BLOCK bb.0
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:vgpr_32(s32) = COPY $vgpr0
+# CHECK-LABEL: BLOCK bb.2
+# CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:vreg_1 = PHI %{{[0-9]*}}:vreg_1, %bb.0, %{{[0-9]*}}:sreg_64, %bb.1
+# CHECK-LABEL: BLOCK bb.3
+# CHECK-LABEL: BLOCK bb.4
+
+name: hidden_diverge
+tracksRegLiveness: true
+body: |
+ bb.0:
+ successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ liveins: $vgpr0, $sgpr0_sgpr1
+
+ %11:sgpr_64(p4) = COPY $sgpr0_sgpr1
+ %10:vgpr_32(s32) = COPY $vgpr0
+ %15:sreg_64_xexec = S_LOAD_DWORDX2_IMM %11(p4), 36, 0
+ %16:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %11(p4), 44, 0
+ %17:sreg_32 = COPY %15.sub1
+ %18:sreg_32 = COPY %15.sub0
+ %19:sgpr_96 = REG_SEQUENCE killed %18, %subreg.sub0, killed %17, %subreg.sub1, killed %16, %subreg.sub2
+ %0:sgpr_96 = COPY %19
+ %20:sreg_32 = S_MOV_B32 -1
+ %21:sreg_64 = V_CMP_GT_I32_e64 %10(s32), killed %20, implicit $exec
+ %22:sreg_32 = S_MOV_B32 0
+ %23:sreg_64 = V_CMP_LT_I32_e64 %10(s32), killed %22, implicit $exec
+ %1:vreg_1 = COPY %21
+ %14:sreg_32 = IMPLICIT_DEF
+ %2:sreg_64 = SI_IF killed %23, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %24:sreg_32 = COPY %0.sub0
+ %3:sreg_32 = COPY %0.sub1
+ %25:sreg_32 = S_MOV_B32 0
+ S_CMP_LT_I32 killed %24, killed %25, implicit-def $scc
+ %26:sreg_64 = COPY $scc
+ %4:sreg_64 = COPY %26
+
+ bb.2:
+ successors: %bb.3(0x40000000), %bb.4(0x40000000)
+
+ %5:sreg_32 = PHI %14, %bb.0, %3, %bb.1
+ %6:vreg_1 = PHI %1, %bb.0, %4, %bb.1
+ SI_END_CF %2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %27:sreg_64 = COPY %6
+ %7:sreg_64 = SI_IF %27, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ successors: %bb.4(0x80000000)
+
+ %8:sreg_32 = COPY %0.sub2
+
+ bb.4:
+ %9:vgpr_32 = PHI %5, %bb.2, %8, %bb.3
+ SI_END_CF %7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+ %28:sreg_64 = IMPLICIT_DEF
+ %29:vreg_64 = COPY %28
+ GLOBAL_STORE_DWORD killed %29, %9, 0, 0, implicit $exec
+ S_ENDPGM 0
+
+...
--- /dev/null
+# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
+
+# This test was generated using SelectionDAG, where the compilation flow does
+# not match the assumptions made in MachineUA. For now, this test mostly serves
+# the purpose of catching in any crash when invoking MachineUA. The test should
+# be deleted when it is clear that it is not actually testing anything useful.
+
+# CHECK-LABEL: MachineUniformityInfo for function: irreducible
+
+---
+name: irreducible
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+body: |
+ bb.0:
+ successors: %bb.1, %bb.2
+ liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr14, $sgpr15, $sgpr16
+
+ %0:sreg_32 = IMPLICIT_DEF
+ %2:vgpr_32 = COPY $vgpr0
+ %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ S_CMP_EQ_U32 %0, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit $scc
+ S_BRANCH %bb.2
+
+ bb.1:
+ %28:vgpr_32 = PHI %3, %bb.0, %49, %bb.5
+ %29:vgpr_32 = V_ADD_U32_e64 %28, 1, 0, implicit $exec
+ S_BRANCH %bb.3
+
+ bb.2:
+ %38:vgpr_32 = PHI %3, %bb.0, %49, %bb.4
+ %39:vgpr_32 = V_ADD_U32_e64 %38, 2, 0, implicit $exec
+
+ bb.3:
+ %49:vgpr_32 = PHI %29, %bb.1, %39, %bb.2
+
+ bb.4:
+ successors: %bb.2, %bb.5
+
+ %50:vgpr_32 = V_AND_B32_e32 3, %2, implicit $exec
+ %51:sreg_64 = V_CMP_EQ_U32_e64 %50, 2, implicit $exec
+ %52:sreg_64 = SI_IF killed %51:sreg_64, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.5:
+ successors: %bb.1, %bb.6
+ %61:sreg_64 = V_CMP_EQ_U32_e64 %50, 1, implicit $exec
+ %62:sreg_64 = SI_IF killed %61:sreg_64, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+
+ bb.6:
+ S_ENDPGM 0
+...
; CHECK: DIVERGENT: br i1 %div.exitx,
X:
+; CHECK: DIVERGENT: %div.user =
%div.user = add i32 %uni.inc, 5
br i1 %uni.cond, label %G, label %Y
Y:
+; CHECK: DIVERGENT: %div.alsouser =
%div.alsouser = add i32 %uni.inc, 5
ret void
}
H:
%uni.merge.h = phi i32 [ 0, %G ], [ %uni.inc, %H ]
%uni.inc = add i32 %uni.merge.h, 1
- br i1 %uni.cond, label %X, label %H ; divergent branch
+ br i1 %uni.cond, label %X, label %H
X:
%uni.user = add i32 %uni.inc, 5
br label %G
G:
+; C HECK: DIVERGENT: %div.user =
%div.user = add i32 %uni.inc, 5
br i1 %uni.cond, label %G, label %Y
; CHECK: DIVERGENT: %div.user =
ret void
}
-; temporal-divergent use of value carried by divergent loop, user is inside sibling loop, defs and use are carried by a uniform loop
+; temporal-divergent use of value carried by divergent loop, user is inside
+; sibling loop, defs and use are carried by a uniform loop
define amdgpu_kernel void @temporal_diverge_loopuser_nested(i32 %n, i32 %a, i32 %b) #0 {
; CHECK-LABEL: for function 'temporal_diverge_loopuser_nested':
; CHECK-NOT: DIVERGENT: %uni.