AMDGPU/SIInsertSkips: Fix the determination of whether early-exit-after-kill is possible

author Nicolai Hähnle <nicolai.haehnle@amd.com>

Fri, 21 Feb 2020 12:36:01 +0000 (13:36 +0100)

committer Nicolai Hähnle <nicolai.haehnle@amd.com>

Wed, 26 Feb 2020 14:30:42 +0000 (15:30 +0100)
author Nicolai Hähnle <nicolai.haehnle@amd.com>
Fri, 21 Feb 2020 12:36:01 +0000 (13:36 +0100)
committer Nicolai Hähnle <nicolai.haehnle@amd.com>
Wed, 26 Feb 2020 14:30:42 +0000 (15:30 +0100)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp

index 80c044e..1b4a5ff 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -18,9 +18,11 @@
  #include "SIInstrInfo.h"
  #include "SIMachineFunctionInfo.h"
  #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/DepthFirstIterator.h"
  #include "llvm/ADT/SmallVector.h"
  #include "llvm/ADT/StringRef.h"
  #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
  #include "llvm/CodeGen/MachineFunction.h"
  #include "llvm/CodeGen/MachineFunctionPass.h"
  #include "llvm/CodeGen/MachineInstr.h"
@@ -28,6 +30,7 @@
  #include "llvm/CodeGen/MachineOperand.h"
  #include "llvm/IR/CallingConv.h"
  #include "llvm/IR/DebugLoc.h"
+#include "llvm/InitializePasses.h"
  #include "llvm/MC/MCAsmInfo.h"
  #include "llvm/Pass.h"
  #include "llvm/Support/CommandLine.h"
@@ -52,16 +55,16 @@ private:
    const SIRegisterInfo *TRI = nullptr;
    const SIInstrInfo *TII = nullptr;
    unsigned SkipThreshold = 0;
+  MachineDominatorTree *MDT = nullptr;
  
    bool shouldSkip(const MachineBasicBlock &From,
                    const MachineBasicBlock &To) const;
  
-  bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
+  bool dominatesAllReachable(MachineBasicBlock &MBB);
+  void skipIfDead(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                  DebugLoc DL);
  
-  void kill(MachineInstr &MI);
-
-  MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
+  bool kill(MachineInstr &MI);
  
    bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
  
@@ -79,6 +82,8 @@ public:
    }
  
    void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
      MachineFunctionPass::getAnalysisUsage(AU);
    }
  };
@@ -87,8 +92,11 @@ public:
  
  char SIInsertSkips::ID = 0;
  
-INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
-                "SI insert s_cbranch_execz instructions", false, false)
+INITIALIZE_PASS_BEGIN(SIInsertSkips, DEBUG_TYPE,
+                      "SI insert s_cbranch_execz instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SIInsertSkips, DEBUG_TYPE,
+                    "SI insert s_cbranch_execz instructions", false, false)
  
  char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
  
@@ -146,42 +154,73 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
    return false;
  }
  
-bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction *MF = MBB.getParent();
-
-  if (MF->getFunction().getCallingConv() != CallingConv::AMDGPU_PS ||
-      !shouldSkip(MBB, MBB.getParent()->back()))
-    return false;
-
-  MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
-
-  const DebugLoc &DL = MI.getDebugLoc();
-
-  // If the exec mask is non-zero, skip the next two instructions
-  BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-    .addMBB(&NextBB);
+/// Check whether \p MBB dominates all blocks that are reachable from it.
+bool SIInsertSkips::dominatesAllReachable(MachineBasicBlock &MBB) {
+  for (MachineBasicBlock *Other : depth_first(&MBB)) {
+    if (!MDT->dominates(&MBB, Other))
+      return false;
+  }
+  return true;
+}
  
-  MachineBasicBlock::iterator Insert = SkipBB->begin();
+/// Insert an "if exec=0 { null export; s_endpgm }" sequence before the given
+/// iterator. Only applies to pixel shaders.
+void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL) {
+  MachineFunction *MF = MBB.getParent();
+  assert(MF->getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
+
+  // Currently, SI_KILL_*_TERMINATOR is expected to occur only as the last
+  // terminator of a basic block. If this ever changes, we need to optionally
+  // split MBB here.
+  assert(I == MBB.end());
+
+  // It is possible for an SI_KILL_*_TERMINATOR to sit at the bottom of a
+  // basic block that has no further successors (e.g., there was an
+  // `unreachable` there in IR). This can happen with original source of the
+  // form:
+  //
+  //   if (uniform_condition) {
+  //     write_to_memory();
+  //     discard;
+  //   }
+  //
+  // In this case, we write the "null_export; s_endpgm" skip code in the
+  // already-existing basic block.
+  auto NextBBI = std::next(MBB.getIterator());
+  bool NoSuccessor = llvm::find(MBB.successors(), &*NextBBI) == MBB.succ_end();
+  MachineBasicBlock *SkipBB;
+
+  if (NoSuccessor) {
+    SkipBB = &MBB;
+  } else {
+    // Create a new basic block that will contain the "null export; s_endpgm"
+    // and set up the branching to go around it.
+    SkipBB = MF->CreateMachineBasicBlock();
+    MF->insert(NextBBI, SkipBB);
  
-  // Exec mask is zero: Export to NULL target...
-  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))
-    .addImm(0x09) // V_008DFC_SQ_EXP_NULL
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addImm(1)  // vm
-    .addImm(0)  // compr
-    .addImm(0); // en
+    BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&*NextBBI);
+    MBB.addSuccessor(SkipBB);
  
-  // ... and terminate wavefront.
-  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
+    MDT->addNewBlock(SkipBB, &MBB);
+  }
  
-  return true;
+  // Generate "null export; s_endpgm".
+  BuildMI(SkipBB, DL, TII->get(AMDGPU::EXP_DONE))
+      .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+      .addReg(AMDGPU::VGPR0, RegState::Undef)
+      .addReg(AMDGPU::VGPR0, RegState::Undef)
+      .addReg(AMDGPU::VGPR0, RegState::Undef)
+      .addReg(AMDGPU::VGPR0, RegState::Undef)
+      .addImm(1)  // vm
+      .addImm(0)  // compr
+      .addImm(0); // en
+  BuildMI(SkipBB, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
  }
  
-void SIInsertSkips::kill(MachineInstr &MI) {
+/// Translate a SI_KILL_*_TERMINATOR into exec-manipulating instructions.
+/// Return true unless the terminator is a no-op.
+bool SIInsertSkips::kill(MachineInstr &MI) {
    MachineBasicBlock &MBB = *MI.getParent();
    DebugLoc DL = MI.getDebugLoc();
  
@@ -268,7 +307,7 @@ void SIInsertSkips::kill(MachineInstr &MI) {
  
        I.addImm(0);  // omod
      }
-    break;
+    return true;
    }
    case AMDGPU::SI_KILL_I1_TERMINATOR: {
      const MachineFunction *MF = MI.getParent()->getParent();
@@ -283,11 +322,13 @@ void SIInsertSkips::kill(MachineInstr &MI) {
        int64_t Imm = Op.getImm();
        assert(Imm == 0 || Imm == -1);
  
-      if (Imm == KillVal)
+      if (Imm == KillVal) {
          BuildMI(MBB, &MI, DL, TII->get(ST.isWave32() ? AMDGPU::S_MOV_B32
                                                       : AMDGPU::S_MOV_B64), Exec)
            .addImm(0);
-      break;
+        return true;
+      }
+      return false;
      }
  
      unsigned Opcode = KillVal ? AMDGPU::S_ANDN2_B64 : AMDGPU::S_AND_B64;
@@ -296,27 +337,13 @@ void SIInsertSkips::kill(MachineInstr &MI) {
      BuildMI(MBB, &MI, DL, TII->get(Opcode), Exec)
          .addReg(Exec)
          .add(Op);
-    break;
+    return true;
    }
    default:
      llvm_unreachable("invalid opcode, expected SI_KILL_*_TERMINATOR");
    }
  }
  
-MachineBasicBlock *SIInsertSkips::insertSkipBlock(
-  MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
-  MachineFunction *MF = MBB.getParent();
-
-  MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
-  MachineFunction::iterator MBBI(MBB);
-  ++MBBI;
-
-  MF->insert(MBBI, SkipBB);
-  MBB.addSuccessor(SkipBB);
-
-  return SkipBB;
-}
-
  // Returns true if a branch over the block was inserted.
  bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
                                     MachineBasicBlock &SrcMBB) {
@@ -430,47 +457,21 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    TII = ST.getInstrInfo();
    TRI = &TII->getRegisterInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
    SkipThreshold = SkipThresholdFlag;
  
-  bool HaveKill = false;
-  bool MadeChange = false;
-
-  // Track depth of exec mask, divergent branches.
-  SmallVector<MachineBasicBlock *, 16> ExecBranchStack;
-
-  MachineFunction::iterator NextBB;
-
    MachineBasicBlock *EmptyMBBAtEnd = nullptr;
+  SmallVector<MachineInstr *, 4> KillInstrs;
+  bool MadeChange = false;
  
-  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-       BI != BE; BI = NextBB) {
-    NextBB = std::next(BI);
-    MachineBasicBlock &MBB = *BI;
-    bool HaveSkipBlock = false;
-
-    if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
-      // Reached convergence point for last divergent branch.
-      ExecBranchStack.pop_back();
-    }
-
-    if (HaveKill && ExecBranchStack.empty()) {
-      HaveKill = false;
-
-      // TODO: Insert skip if exec is 0?
-    }
-
+  for (MachineBasicBlock &MBB : MF) {
      MachineBasicBlock::iterator I, Next;
      for (I = MBB.begin(); I != MBB.end(); I = Next) {
        Next = std::next(I);
-
        MachineInstr &MI = *I;
  
        switch (MI.getOpcode()) {
-      case AMDGPU::S_CBRANCH_EXECZ:
-        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
-        break;
        case AMDGPU::SI_MASK_BRANCH:
-        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
          MadeChange |= skipMaskBranch(MI, MBB);
          break;
  
@@ -478,32 +479,37 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
          // Optimize out branches to the next block.
          // FIXME: Shouldn't this be handled by BranchFolding?
          if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
+          assert(&MI == &MBB.back());
            MI.eraseFromParent();
-        } else if (HaveSkipBlock) {
-          // Remove the given unconditional branch when a skip block has been
-          // inserted after the current one and let skip the two instructions
-          // performing the kill if the exec mask is non-zero.
-          MI.eraseFromParent();
+          MadeChange = true;
          }
          break;
  
        case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
-      case AMDGPU::SI_KILL_I1_TERMINATOR:
+      case AMDGPU::SI_KILL_I1_TERMINATOR: {
          MadeChange = true;
-        kill(MI);
-
-        if (ExecBranchStack.empty()) {
-          if (NextBB != BE && skipIfDead(MI, *NextBB)) {
-            HaveSkipBlock = true;
-            NextBB = std::next(BI);
-            BE = MF.end();
-          }
+        bool CanKill = kill(MI);
+
+        // Check if we can add an early "if exec=0 { end shader }".
+        //
+        // Note that we _always_ do this if it is correct, even if the kill
+        // happens fairly late in the shader, because the null export should
+        // generally still be cheaper than normal export(s).
+        //
+        // TODO: The dominatesAllReachable check is conservative: if the
+        //       dominance is only missing due to _uniform_ branches, we could
+        //       in fact insert the early-exit as well.
+        if (CanKill &&
+            MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS &&
+            dominatesAllReachable(MBB)) {
+          // Mark the instruction for kill-if-dead insertion. We delay this
+          // change because it modifies the CFG.
+          KillInstrs.push_back(&MI);
          } else {
-          HaveKill = true;
+          MI.eraseFromParent();
          }
-
-        MI.eraseFromParent();
          break;
+      }
  
        case AMDGPU::SI_RETURN_TO_EPILOG:
          // FIXME: Should move somewhere else
@@ -511,7 +517,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
  
          // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
          // because external bytecode will be appended at the end.
-        if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
+        if (&MBB != &MF.back() || &MI != &MBB.back()) {
            // SI_RETURN_TO_EPILOG is not the last instruction. Add an empty block at
            // the end and jump there.
            if (!EmptyMBBAtEnd) {
@@ -520,9 +526,9 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
            }
  
            MBB.addSuccessor(EmptyMBBAtEnd);
-          BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
              .addMBB(EmptyMBBAtEnd);
-          I->eraseFromParent();
+          MI.eraseFromParent();
          }
          break;
  
@@ -537,5 +543,12 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
      }
    }
  
+  for (MachineInstr *Kill : KillInstrs) {
+    skipIfDead(*Kill->getParent(), std::next(Kill->getIterator()),
+               Kill->getDebugLoc());
+    Kill->eraseFromParent();
+  }
+  KillInstrs.clear();
+
    return MadeChange;
  }
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

index 442e474..7410d82 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -12,7 +12,11 @@ define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
  ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
  ; CHECK-NEXT: ; %bb.0:
  ; CHECK-NEXT: s_mov_b64 exec, 0
+; CHECK-NEXT: s_cbranch_execnz BB1_2
  ; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: exp null off, off, off, off done vm
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB1_2:
  ; CHECK-NEXT: s_endpgm
  define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
    call void @llvm.amdgcn.kill(i1 false)
@@ -23,9 +27,15 @@ define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
  ; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg_x2:
  ; CHECK-NEXT: ; %bb.0:
  ; CHECK-NEXT: s_mov_b64 exec, 0
-; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: s_cbranch_execnz BB2_2
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK:      BB2_2:
  ; CHECK-NEXT: s_mov_b64 exec, 0
-; CHECK-NEXT: ; %bb.2:
+; CHECK-NEXT: s_cbranch_execnz BB2_4
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB2_4:
  ; CHECK-NEXT: s_endpgm
  define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
    call void @llvm.amdgcn.kill(i1 false)
@@ -36,7 +46,10 @@ define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
  ; CHECK-LABEL: {{^}}test_kill_depth_var:
  ; CHECK-NEXT: ; %bb.0:
  ; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
-; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: s_cbranch_execnz BB3_2
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB3_2:
  ; CHECK-NEXT: s_endpgm
  define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
    %cmp = fcmp olt float %x, 0.0
@@ -48,9 +61,15 @@ define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
  ; CHECK-LABEL: {{^}}test_kill_depth_var_x2_same:
  ; CHECK-NEXT: ; %bb.0:
  ; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
-; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: s_cbranch_execnz BB4_2
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB4_2:
  ; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
-; CHECK-NEXT: ; %bb.2:
+; CHECK-NEXT: s_cbranch_execnz BB4_4
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB4_4:
  ; CHECK-NEXT: s_endpgm
  define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
    %cmp = fcmp olt float %x, 0.0
@@ -59,12 +78,19 @@ define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
    ret void
  }
  
+; FIXME: Ideally only one early-exit would be emitted
  ; CHECK-LABEL: {{^}}test_kill_depth_var_x2:
  ; CHECK-NEXT: ; %bb.0:
  ; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v0
-; CHECK-NEXT: ; %bb.1:
+; CHECK-NEXT: s_cbranch_execnz BB5_2
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB5_2:
  ; CHECK-NEXT: v_cmpx_gt_f32_e32 vcc, 0, v1
-; CHECK-NEXT: ; %bb.2:
+; CHECK-NEXT: s_cbranch_execnz BB5_4
+; CHECK:      exp null
+; CHECK-NEXT: s_endpgm
+; CHECK-NEXT: BB5_4:
  ; CHECK-NEXT: s_endpgm
  define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
    %cmp.x = fcmp olt float %x, 0.0
@@ -119,14 +145,12 @@ define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
  ; CHECK: v_nop_e64
  
  ; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7
-; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
-; CHECK-NEXT: ; %bb.2:
-; CHECK-NEXT: exp null off, off, off, off done vm
-; CHECK-NEXT: s_endpgm
  
-; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
-; CHECK-NEXT: s_endpgm
-define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 {
+; TODO: We could do an early-exit here (the branch above is uniform!)
+; CHECK-NOT: exp null
+
+; CHECK: v_mov_b32_e32 v0, 1.0
+define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
  entry:
    %cmp = icmp eq i32 %arg, 0
    br i1 %cmp, label %bb, label %exit
@@ -149,7 +173,7 @@ bb:
    br label %exit
  
  exit:
-  ret void
+  ret float 1.0
  }
  
  ; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
@@ -171,13 +195,10 @@ exit:
  ; CHECK: v_mov_b32_e64 v8, -1
  ; CHECK: ;;#ASMEND
  ; CHECK: v_cmpx_gt_f32_e32 vcc, 0, v7
-; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
  
-; CHECK-NEXT: ; %bb.2:
-; CHECK-NEXT: exp null off, off, off, off done vm
-; CHECK-NEXT: s_endpgm
+; TODO: We could do an early-exit here (the branch above is uniform!)
+; CHECK-NOT: exp null
  
-; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
  ; CHECK: buffer_store_dword v8
  ; CHECK: v_mov_b32_e64 v9, -2
  
@@ -435,10 +456,7 @@ export:
  
  ; CHECK-LABEL: {{^}}complex_loop:
  ; CHECK: s_mov_b64 exec, 0
-; The following is an error, since it happens nested inside the loop:
-; CHECK-NEXT: s_cbranch_execnz
-; CHECK-NEXT: ; %bb.{{[0-9]+}}
-; CHECK-NEXT: exp null
+; CHECK-NOT: exp null
  define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
  .entry:
    %flaga = icmp sgt i32 %cmpa, 0
author	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Fri, 21 Feb 2020 12:36:01 +0000 (13:36 +0100)
committer	Nicolai Hähnle <nicolai.haehnle@amd.com>
	Wed, 26 Feb 2020 14:30:42 +0000 (15:30 +0100)
llvm/lib/Target/AMDGPU/SIInsertSkips.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/skip-if-dead.ll		patch \| blob \| history