[AMDGPU] Merge SIRemoveShortExecBranches into SIPreEmitPeephole

author Carl Ritson <carl.ritson@amd.com>

Sat, 20 Mar 2021 01:29:08 +0000 (10:29 +0900)

committer Carl Ritson <carl.ritson@amd.com>

Sat, 20 Mar 2021 02:26:42 +0000 (11:26 +0900)
author Carl Ritson <carl.ritson@amd.com>
Sat, 20 Mar 2021 01:29:08 +0000 (10:29 +0900)
committer Carl Ritson <carl.ritson@amd.com>
Sat, 20 Mar 2021 02:26:42 +0000 (11:26 +0900)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h

index cdd59fe..4f9f888 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -204,9 +204,6 @@ extern char &SIWholeQuadModeID;
  void initializeSILowerControlFlowPass(PassRegistry &);
  extern char &SILowerControlFlowID;
  
-void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
-extern char &SIRemoveShortExecBranchesID;
-
  void initializeSIPreEmitPeepholePass(PassRegistry &);
  extern char &SIPreEmitPeepholeID;
  
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index 9db4e8c..2b42f9e 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -249,7 +249,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
    initializeSIModeRegisterPass(*PR);
    initializeSIWholeQuadModePass(*PR);
    initializeSILowerControlFlowPass(*PR);
-  initializeSIRemoveShortExecBranchesPass(*PR);
    initializeSIPreEmitPeepholePass(*PR);
    initializeSIInsertSkipsPass(*PR);
    initializeSIMemoryLegalizerPass(*PR);
@@ -1215,7 +1214,6 @@ void GCNPassConfig::addPreEmitPass() {
    if (getOptLevel() > CodeGenOpt::None)
      addPass(&SIInsertHardClausesID);
  
-  addPass(&SIRemoveShortExecBranchesID);
    addPass(&SIInsertSkipsPassID);
    addPass(&SIPreEmitPeepholeID);
    // The hazard recognizer that runs as part of the post-ra scheduler does not
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt

index 7aa2568..03b0c0f 100644 (file)
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -137,7 +137,6 @@ add_llvm_target(AMDGPUCodeGen
    SIPreEmitPeephole.cpp
    SIProgramInfo.cpp
    SIRegisterInfo.cpp
-  SIRemoveShortExecBranches.cpp
    SIShrinkInstructions.cpp
    SIWholeQuadMode.cpp
    GCNILPSched.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

index 5f10fef..93d33fd 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -21,6 +21,14 @@ using namespace llvm;
  
  #define DEBUG_TYPE "si-pre-emit-peephole"
  
+static unsigned SkipThreshold;
+
+static cl::opt<unsigned, true> SkipThresholdFlag(
+    "amdgpu-skip-threshold", cl::Hidden,
+    cl::desc(
+        "Number of instructions before jumping over divergent control flow"),
+    cl::location(SkipThreshold), cl::init(12));
+
  namespace {
  
  class SIPreEmitPeephole : public MachineFunctionPass {
@@ -30,6 +38,13 @@ private:
  
    bool optimizeVccBranch(MachineInstr &MI) const;
    bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
+  bool getBlockDestinations(MachineBasicBlock &SrcMBB,
+                            MachineBasicBlock *&TrueMBB,
+                            MachineBasicBlock *&FalseMBB,
+                            SmallVectorImpl<MachineOperand> &Cond);
+  bool mustRetainExeczBranch(const MachineBasicBlock &From,
+                             const MachineBasicBlock &To) const;
+  bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
  
  public:
    static char ID;
@@ -258,6 +273,74 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
    return true;
  }
  
+bool SIPreEmitPeephole::getBlockDestinations(
+    MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
+    MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
+  if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
+    return false;
+
+  if (!FalseMBB)
+    FalseMBB = SrcMBB.getNextNode();
+
+  return true;
+}
+
+bool SIPreEmitPeephole::mustRetainExeczBranch(
+    const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+  unsigned NumInstr = 0;
+  const MachineFunction *MF = From.getParent();
+
+  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
+       MBBI != End && MBBI != ToI; ++MBBI) {
+    const MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      // When a uniform loop is inside non-uniform control flow, the branch
+      // leaving the loop might never be taken when EXEC = 0.
+      // Hence we should retain cbranch out of the loop lest it become infinite.
+      if (I->isConditionalBranch())
+        return true;
+
+      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+        return true;
+
+      // These instructions are potentially expensive even if EXEC = 0.
+      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+          TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
+        return true;
+
+      ++NumInstr;
+      if (NumInstr >= SkipThreshold)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+// Returns true if the skip branch instruction is removed.
+bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
+                                          MachineBasicBlock &SrcMBB) {
+  MachineBasicBlock *TrueMBB = nullptr;
+  MachineBasicBlock *FalseMBB = nullptr;
+  SmallVector<MachineOperand, 1> Cond;
+
+  if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
+    return false;
+
+  // Consider only the forward branches.
+  if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
+      mustRetainExeczBranch(*FalseMBB, *TrueMBB))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
+  MI.eraseFromParent();
+  SrcMBB.removeSuccessor(TrueMBB);
+
+  return true;
+}
+
  bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
    TII = ST.getInstrInfo();
@@ -265,10 +348,12 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
    MachineBasicBlock *EmptyMBBAtEnd = nullptr;
    bool Changed = false;
  
+  MF.RenumberBlocks();
+
    for (MachineBasicBlock &MBB : MF) {
      MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
      MachineBasicBlock::iterator TermI = MBBE;
-    // Check first terminator for VCC branches to optimize
+    // Check first terminator for branches to optimize
      if (TermI != MBB.end()) {
        MachineInstr &MI = *TermI;
        switch (MI.getOpcode()) {
@@ -276,6 +361,9 @@ bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
        case AMDGPU::S_CBRANCH_VCCNZ:
          Changed |= optimizeVccBranch(MI);
          continue;
+      case AMDGPU::S_CBRANCH_EXECZ:
+        Changed |= removeExeczBranch(MI, MBB);
+        continue;
        default:
          break;
        }
diff --git a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp b/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp

deleted file mode 100644 (file)

index 104dea8..0000000
--- a/llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass optmizes the s_cbranch_execz instructions.
-/// The pass removes this skip instruction for short branches,
-/// if there is no unwanted sideeffect in the fallthrough code sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-remove-short-exec-branches"
-
-static unsigned SkipThreshold;
-
-static cl::opt<unsigned, true> SkipThresholdFlag(
-    "amdgpu-skip-threshold", cl::Hidden,
-    cl::desc(
-        "Number of instructions before jumping over divergent control flow"),
-    cl::location(SkipThreshold), cl::init(12));
-
-namespace {
-
-class SIRemoveShortExecBranches : public MachineFunctionPass {
-private:
-  const SIInstrInfo *TII = nullptr;
-  bool getBlockDestinations(MachineBasicBlock &SrcMBB,
-                            MachineBasicBlock *&TrueMBB,
-                            MachineBasicBlock *&FalseMBB,
-                            SmallVectorImpl<MachineOperand> &Cond);
-  bool mustRetainExeczBranch(const MachineBasicBlock &From,
-                             const MachineBasicBlock &To) const;
-  bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
-
-public:
-  static char ID;
-
-  SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
-    initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
-                "SI remove short exec branches", false, false)
-
-char SIRemoveShortExecBranches::ID = 0;
-
-char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
-
-bool SIRemoveShortExecBranches::getBlockDestinations(
-    MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
-    MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
-  if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
-    return false;
-
-  if (!FalseMBB)
-    FalseMBB = SrcMBB.getNextNode();
-
-  return true;
-}
-
-bool SIRemoveShortExecBranches::mustRetainExeczBranch(
-    const MachineBasicBlock &From, const MachineBasicBlock &To) const {
-  unsigned NumInstr = 0;
-  const MachineFunction *MF = From.getParent();
-
-  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
-       MBBI != End && MBBI != ToI; ++MBBI) {
-    const MachineBasicBlock &MBB = *MBBI;
-
-    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
-         I != E; ++I) {
-      // When a uniform loop is inside non-uniform control flow, the branch
-      // leaving the loop might never be taken when EXEC = 0.
-      // Hence we should retain cbranch out of the loop lest it become infinite.
-      if (I->isConditionalBranch())
-        return true;
-
-      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
-        return true;
-
-      if (TII->isKillTerminator(I->getOpcode()))
-        return true;
-
-      // These instructions are potentially expensive even if EXEC = 0.
-      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
-          TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
-        return true;
-
-      ++NumInstr;
-      if (NumInstr >= SkipThreshold)
-        return true;
-    }
-  }
-
-  return false;
-}
-
-// Returns true if the skip branch instruction is removed.
-bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
-                                                  MachineBasicBlock &SrcMBB) {
-  MachineBasicBlock *TrueMBB = nullptr;
-  MachineBasicBlock *FalseMBB = nullptr;
-  SmallVector<MachineOperand, 1> Cond;
-
-  if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
-    return false;
-
-  // Consider only the forward branches.
-  if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
-      mustRetainExeczBranch(*FalseMBB, *TrueMBB))
-    return false;
-
-  LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
-  MI.eraseFromParent();
-  SrcMBB.removeSuccessor(TrueMBB);
-
-  return true;
-}
-
-bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
-  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
-  TII = ST.getInstrInfo();
-  MF.RenumberBlocks();
-  bool Changed = false;
-
-  for (MachineBasicBlock &MBB : MF) {
-    MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
-    if (MBBI == MBB.end())
-      continue;
-
-    MachineInstr &MI = *MBBI;
-    switch (MI.getOpcode()) {
-    case AMDGPU::S_CBRANCH_EXECZ:
-      Changed = removeExeczBranch(MI, MBB);
-      break;
-    default:
-      break;
-    }
-  }
-
-  return Changed;
-}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll

index f0d7606..1b8689d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
@@ -166,12 +166,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; SI-NEXT:    s_xor_b64 s[2:3], vcc, -1
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; SI-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB2_3
  ; SI-NEXT:  ; %bb.1: ; %.demote
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB2_4
  ; SI-NEXT:  ; %bb.2: ; %.demote
  ; SI-NEXT:    s_mov_b64 exec, 0
-; SI-NEXT:  ; %bb.3: ; %.continue
+; SI-NEXT:  BB2_3: ; %.continue
  ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
  ; SI-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -192,12 +193,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; GFX9-NEXT:    s_xor_b64 s[2:3], vcc, -1
  ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; GFX9-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; GFX9-NEXT:    s_cbranch_execz BB2_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB2_4
  ; GFX9-NEXT:  ; %bb.2: ; %.demote
  ; GFX9-NEXT:    s_mov_b64 exec, 0
-; GFX9-NEXT:  ; %bb.3: ; %.continue
+; GFX9-NEXT:  BB2_3: ; %.continue
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
  ; GFX9-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -218,12 +220,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; GFX10-32-NEXT:    s_xor_b32 s1, vcc_lo, -1
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s2, s1
  ; GFX10-32-NEXT:    s_xor_b32 s1, exec_lo, s2
+; GFX10-32-NEXT:    s_cbranch_execz BB2_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB2_4
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue
+; GFX10-32-NEXT:  BB2_3: ; %.continue
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
  ; GFX10-32-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
  ; GFX10-32-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -244,12 +247,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], vcc, -1
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; GFX10-64-NEXT:    s_cbranch_execz BB2_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB2_4
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-64-NEXT:    s_mov_b64 exec, 0
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue
+; GFX10-64-NEXT:  BB2_3: ; %.continue
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX10-64-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
  ; GFX10-64-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -284,13 +288,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
  ; SI-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; SI-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT:    s_cbranch_execz BB3_3
  ; SI-NEXT:  ; %bb.1: ; %.demote
  ; SI-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; SI-NEXT:    s_cbranch_scc0 BB3_4
  ; SI-NEXT:  ; %bb.2: ; %.demote
  ; SI-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; SI-NEXT:    s_and_b64 exec, exec, s[16:17]
-; SI-NEXT:  ; %bb.3: ; %.continue
+; SI-NEXT:  BB3_3: ; %.continue
  ; SI-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; SI-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
  ; SI-NEXT:    s_waitcnt vmcnt(0)
@@ -312,13 +317,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX9-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
  ; GFX9-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX9-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT:    s_cbranch_execz BB3_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote
  ; GFX9-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB3_4
  ; GFX9-NEXT:  ; %bb.2: ; %.demote
  ; GFX9-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT:  ; %bb.3: ; %.continue
+; GFX9-NEXT:  BB3_3: ; %.continue
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; GFX9-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
  ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -340,13 +346,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v1
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s13, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT:    s_cbranch_execz BB3_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-32-NEXT:    s_andn2_b32 s12, s12, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB3_4
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-32-NEXT:    s_wqm_b32 s28, s12
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue
+; GFX10-32-NEXT:  BB3_3: ; %.continue
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s13
  ; GFX10-32-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
  ; GFX10-32-NEXT:    s_waitcnt vmcnt(0)
@@ -368,13 +375,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT:    s_cbranch_execz BB3_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-64-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB3_4
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-64-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue
+; GFX10-64-NEXT:  BB3_3: ; %.continue
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[28:29]
  ; GFX10-64-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
  ; GFX10-64-NEXT:    s_waitcnt vmcnt(0)
@@ -416,13 +424,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; SI-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT:    s_cbranch_execz BB4_3
  ; SI-NEXT:  ; %bb.1: ; %.demote
  ; SI-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; SI-NEXT:    s_cbranch_scc0 BB4_4
  ; SI-NEXT:  ; %bb.2: ; %.demote
  ; SI-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; SI-NEXT:    s_and_b64 exec, exec, s[16:17]
-; SI-NEXT:  ; %bb.3: ; %.continue
+; SI-NEXT:  BB4_3: ; %.continue
  ; SI-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; SI-NEXT:    v_add_f32_e32 v0, v0, v0
  ; SI-NEXT:    s_and_b64 exec, exec, s[12:13]
@@ -444,13 +453,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX9-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX9-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT:    s_cbranch_execz BB4_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote
  ; GFX9-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB4_4
  ; GFX9-NEXT:  ; %bb.2: ; %.demote
  ; GFX9-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT:  ; %bb.3: ; %.continue
+; GFX9-NEXT:  BB4_3: ; %.continue
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
@@ -472,13 +482,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s13, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT:    s_cbranch_execz BB4_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-32-NEXT:    s_andn2_b32 s12, s12, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB4_4
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-32-NEXT:    s_wqm_b32 s28, s12
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue
+; GFX10-32-NEXT:  BB4_3: ; %.continue
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s13
  ; GFX10-32-NEXT:    v_add_f32_e32 v0, v0, v0
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s12
@@ -500,13 +511,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT:    s_cbranch_execz BB4_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-64-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB4_4
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-64-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue
+; GFX10-64-NEXT:  BB4_3: ; %.continue
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[28:29]
  ; GFX10-64-NEXT:    v_add_f32_e32 v0, v0, v0
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[12:13]
@@ -660,13 +672,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB6_3
  ; SI-NEXT:  ; %bb.1: ; %.demote0
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB6_7
  ; SI-NEXT:  ; %bb.2: ; %.demote0
  ; SI-NEXT:    s_wqm_b64 s[6:7], s[0:1]
  ; SI-NEXT:    s_and_b64 exec, exec, s[6:7]
-; SI-NEXT:  ; %bb.3: ; %.continue0
+; SI-NEXT:  BB6_3: ; %.continue0
  ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; SI-NEXT:    s_mov_b64 s[4:5], s[0:1]
  ; SI-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s[4:5]
@@ -682,12 +695,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; SI-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
  ; SI-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
  ; SI-NEXT:    s_xor_b64 s[4:5], exec, s[6:7]
+; SI-NEXT:    s_cbranch_execz BB6_6
  ; SI-NEXT:  ; %bb.4: ; %.demote1
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB6_7
  ; SI-NEXT:  ; %bb.5: ; %.demote1
  ; SI-NEXT:    s_mov_b64 exec, 0
-; SI-NEXT:  ; %bb.6: ; %.continue1
+; SI-NEXT:  BB6_6: ; %.continue1
  ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; SI-NEXT:    v_mov_b32_e32 v0, s2
  ; SI-NEXT:    v_mov_b32_e32 v1, s3
@@ -706,13 +720,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; GFX9-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; GFX9-NEXT:    s_cbranch_execz BB6_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote0
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX9-NEXT:  ; %bb.2: ; %.demote0
  ; GFX9-NEXT:    s_wqm_b64 s[4:5], s[0:1]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[4:5]
-; GFX9-NEXT:  ; %bb.3: ; %.continue0
+; GFX9-NEXT:  BB6_3: ; %.continue0
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX9-NEXT:    s_mov_b64 s[2:3], s[0:1]
  ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
@@ -728,12 +743,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX9-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
  ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; GFX9-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; GFX9-NEXT:    s_cbranch_execz BB6_6
  ; GFX9-NEXT:  ; %bb.4: ; %.demote1
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX9-NEXT:  ; %bb.5: ; %.demote1
  ; GFX9-NEXT:    s_mov_b64 exec, 0
-; GFX9-NEXT:  ; %bb.6: ; %.continue1
+; GFX9-NEXT:  BB6_6: ; %.continue1
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3c00
  ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 60
@@ -752,13 +768,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX10-32-NEXT:    s_cbranch_execz BB6_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-32-NEXT:    s_wqm_b32 s2, s0
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s2
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue0
+; GFX10-32-NEXT:  BB6_3: ; %.continue0
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
  ; GFX10-32-NEXT:    s_mov_b32 s1, s0
  ; GFX10-32-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s1
@@ -772,12 +789,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-32-NEXT:    s_xor_b32 s1, s1, -1
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s2, s1
  ; GFX10-32-NEXT:    s_xor_b32 s1, exec_lo, s2
+; GFX10-32-NEXT:    s_cbranch_execz BB6_6
  ; GFX10-32-NEXT:  ; %bb.4: ; %.demote1
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-32-NEXT:  ; %bb.5: ; %.demote1
  ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT:  ; %bb.6: ; %.continue1
+; GFX10-32-NEXT:  BB6_6: ; %.continue1
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
  ; GFX10-32-NEXT:    v_mov_b32_e32 v0, 0x3c00
  ; GFX10-32-NEXT:    v_bfrev_b32_e32 v1, 60
@@ -796,13 +814,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; GFX10-64-NEXT:    s_cbranch_execz BB6_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-64-NEXT:    s_wqm_b64 s[4:5], s[0:1]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[4:5]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue0
+; GFX10-64-NEXT:  BB6_3: ; %.continue0
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX10-64-NEXT:    s_mov_b64 s[2:3], s[0:1]
  ; GFX10-64-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
@@ -816,12 +835,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], s[2:3], -1
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; GFX10-64-NEXT:    s_cbranch_execz BB6_6
  ; GFX10-64-NEXT:  ; %bb.4: ; %.demote1
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-64-NEXT:  ; %bb.5: ; %.demote1
  ; GFX10-64-NEXT:    s_mov_b64 exec, 0
-; GFX10-64-NEXT:  ; %bb.6: ; %.continue1
+; GFX10-64-NEXT:  BB6_6: ; %.continue1
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX10-64-NEXT:    v_mov_b32_e32 v0, 0x3c00
  ; GFX10-64-NEXT:    v_bfrev_b32_e32 v1, 60
@@ -883,13 +903,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB7_3
  ; SI-NEXT:  ; %bb.1: ; %.demote0
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB7_9
  ; SI-NEXT:  ; %bb.2: ; %.demote0
  ; SI-NEXT:    s_wqm_b64 s[8:9], s[0:1]
  ; SI-NEXT:    s_and_b64 exec, exec, s[8:9]
-; SI-NEXT:  ; %bb.3: ; %.continue0.preheader
+; SI-NEXT:  BB7_3: ; %.continue0.preheader
  ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; SI-NEXT:    s_mov_b64 s[4:5], 0
  ; SI-NEXT:    v_mov_b32_e32 v0, s6
@@ -948,13 +969,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; GFX9-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; GFX9-NEXT:    s_cbranch_execz BB7_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote0
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB7_9
  ; GFX9-NEXT:  ; %bb.2: ; %.demote0
  ; GFX9-NEXT:    s_wqm_b64 s[6:7], s[0:1]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[6:7]
-; GFX9-NEXT:  ; %bb.3: ; %.continue0.preheader
+; GFX9-NEXT:  BB7_3: ; %.continue0.preheader
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX9-NEXT:    s_mov_b64 s[2:3], 0
  ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -1013,13 +1035,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; GFX10-32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s2, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s2, exec_lo, s2
+; GFX10-32-NEXT:    s_cbranch_execz BB7_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB7_9
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-32-NEXT:    s_wqm_b32 s3, s0
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s3
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue0.preheader
+; GFX10-32-NEXT:  BB7_3: ; %.continue0.preheader
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s2
  ; GFX10-32-NEXT:    v_mov_b32_e32 v0, s1
  ; GFX10-32-NEXT:    s_branch BB7_5
@@ -1075,13 +1098,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; GFX10-64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-64-NEXT:    s_cbranch_execz BB7_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB7_9
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-64-NEXT:    s_wqm_b64 s[6:7], s[0:1]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[6:7]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue0.preheader
+; GFX10-64-NEXT:  BB7_3: ; %.continue0.preheader
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; GFX10-64-NEXT:    v_mov_b32_e32 v0, s2
  ; GFX10-64-NEXT:    s_mov_b64 s[2:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir

index 6ce629a..7b37990 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir
@@ -1,5 +1,5 @@
  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=1 -verify-machineinstrs  %s -o - | FileCheck %s
  
  ---
  
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir

index 5424ad3..95b5373 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir
@@ -1,5 +1,5 @@
  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=1 -verify-machineinstrs  %s -o - | FileCheck %s
  # Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
  
  ---
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir

index 9283244..97c8b50 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
  
  ---
  
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll

index 0b0fb98..9edd1a3 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -167,12 +167,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; SI-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], s[0:1]
  ; SI-NEXT:    s_xor_b64 s[0:1], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB2_3
  ; SI-NEXT:  ; %bb.1: ; %.demote
  ; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
  ; SI-NEXT:    s_cbranch_scc0 BB2_4
  ; SI-NEXT:  ; %bb.2: ; %.demote
  ; SI-NEXT:    s_mov_b64 exec, 0
-; SI-NEXT:  ; %bb.3: ; %.continue
+; SI-NEXT:  BB2_3: ; %.continue
  ; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
  ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
  ; SI-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -194,12 +195,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], s[0:1]
  ; GFX9-NEXT:    s_xor_b64 s[0:1], exec, s[4:5]
+; GFX9-NEXT:    s_cbranch_execz BB2_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote
  ; GFX9-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB2_4
  ; GFX9-NEXT:  ; %bb.2: ; %.demote
  ; GFX9-NEXT:    s_mov_b64 exec, 0
-; GFX9-NEXT:  ; %bb.3: ; %.continue
+; GFX9-NEXT:  BB2_3: ; %.continue
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
  ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
  ; GFX9-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -221,12 +223,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; GFX10-32-NEXT:    v_cmp_eq_u32_e64 s0, 1, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s2, s0
  ; GFX10-32-NEXT:    s_xor_b32 s0, exec_lo, s2
+; GFX10-32-NEXT:    s_cbranch_execz BB2_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-32-NEXT:    s_andn2_b32 s1, s1, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB2_4
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue
+; GFX10-32-NEXT:  BB2_3: ; %.continue
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
  ; GFX10-32-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
  ; GFX10-32-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -248,12 +251,13 @@ define amdgpu_ps void @branch(float %arg0, float %arg1) {
  ; GFX10-64-NEXT:    v_cmp_eq_u32_e64 s[0:1], 1, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[4:5], s[0:1]
  ; GFX10-64-NEXT:    s_xor_b64 s[0:1], exec, s[4:5]
+; GFX10-64-NEXT:    s_cbranch_execz BB2_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB2_4
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-64-NEXT:    s_mov_b64 exec, 0
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue
+; GFX10-64-NEXT:  BB2_3: ; %.continue
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[0:1]
  ; GFX10-64-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
  ; GFX10-64-NEXT:    exp mrt1 v0, v0, v0, v0 done vm
@@ -289,13 +293,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
  ; SI-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; SI-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT:    s_cbranch_execz BB3_3
  ; SI-NEXT:  ; %bb.1: ; %.demote
  ; SI-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; SI-NEXT:    s_cbranch_scc0 BB3_4
  ; SI-NEXT:  ; %bb.2: ; %.demote
  ; SI-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; SI-NEXT:    s_and_b64 exec, exec, s[16:17]
-; SI-NEXT:  ; %bb.3: ; %.continue
+; SI-NEXT:  BB3_3: ; %.continue
  ; SI-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; SI-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
  ; SI-NEXT:    s_waitcnt vmcnt(0)
@@ -317,13 +322,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX9-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
  ; GFX9-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX9-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT:    s_cbranch_execz BB3_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote
  ; GFX9-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB3_4
  ; GFX9-NEXT:  ; %bb.2: ; %.demote
  ; GFX9-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT:  ; %bb.3: ; %.continue
+; GFX9-NEXT:  BB3_3: ; %.continue
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; GFX9-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
  ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -345,13 +351,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v1
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s13, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT:    s_cbranch_execz BB3_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-32-NEXT:    s_andn2_b32 s12, s12, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB3_4
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-32-NEXT:    s_wqm_b32 s28, s12
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue
+; GFX10-32-NEXT:  BB3_3: ; %.continue
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s13
  ; GFX10-32-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
  ; GFX10-32-NEXT:    s_waitcnt vmcnt(0)
@@ -373,13 +380,14 @@ define amdgpu_ps <4 x float> @wqm_demote_1(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT:    s_cbranch_execz BB3_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-64-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB3_4
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-64-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue
+; GFX10-64-NEXT:  BB3_3: ; %.continue
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[28:29]
  ; GFX10-64-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
  ; GFX10-64-NEXT:    s_waitcnt vmcnt(0)
@@ -421,13 +429,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; SI-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT:    s_cbranch_execz BB4_3
  ; SI-NEXT:  ; %bb.1: ; %.demote
  ; SI-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; SI-NEXT:    s_cbranch_scc0 BB4_4
  ; SI-NEXT:  ; %bb.2: ; %.demote
  ; SI-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; SI-NEXT:    s_and_b64 exec, exec, s[16:17]
-; SI-NEXT:  ; %bb.3: ; %.continue
+; SI-NEXT:  BB4_3: ; %.continue
  ; SI-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; SI-NEXT:    v_add_f32_e32 v0, v0, v0
  ; SI-NEXT:    s_and_b64 exec, exec, s[12:13]
@@ -449,13 +458,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX9-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX9-NEXT:    s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT:    s_cbranch_execz BB4_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote
  ; GFX9-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB4_4
  ; GFX9-NEXT:  ; %bb.2: ; %.demote
  ; GFX9-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT:  ; %bb.3: ; %.continue
+; GFX9-NEXT:  BB4_3: ; %.continue
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[14:15]
  ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v0
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[12:13]
@@ -477,13 +487,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s13, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT:    s_cbranch_execz BB4_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-32-NEXT:    s_andn2_b32 s12, s12, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB4_4
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-32-NEXT:    s_wqm_b32 s28, s12
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue
+; GFX10-32-NEXT:  BB4_3: ; %.continue
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s13
  ; GFX10-32-NEXT:    v_add_f32_e32 v0, v0, v0
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s12
@@ -505,13 +516,14 @@ define amdgpu_ps <4 x float> @wqm_demote_2(<8 x i32> inreg %rsrc, <4 x i32> inre
  ; GFX10-64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[14:15], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT:    s_cbranch_execz BB4_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote
  ; GFX10-64-NEXT:    s_andn2_b64 s[12:13], s[12:13], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB4_4
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote
  ; GFX10-64-NEXT:    s_wqm_b64 s[16:17], s[12:13]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue
+; GFX10-64-NEXT:  BB4_3: ; %.continue
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[28:29]
  ; GFX10-64-NEXT:    v_add_f32_e32 v0, v0, v0
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[12:13]
@@ -659,13 +671,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; SI-NEXT:    s_cbranch_execz BB6_3
  ; SI-NEXT:  ; %bb.1: ; %.demote0
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB6_7
  ; SI-NEXT:  ; %bb.2: ; %.demote0
  ; SI-NEXT:    s_wqm_b64 s[4:5], s[0:1]
  ; SI-NEXT:    s_and_b64 exec, exec, s[4:5]
-; SI-NEXT:  ; %bb.3: ; %.continue0
+; SI-NEXT:  BB6_3: ; %.continue0
  ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; SI-NEXT:    s_mov_b64 s[2:3], s[0:1]
  ; SI-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
@@ -681,12 +694,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; SI-NEXT:    s_or_b64 s[2:3], s[2:3], vcc
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; SI-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB6_6
  ; SI-NEXT:  ; %bb.4: ; %.demote1
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB6_7
  ; SI-NEXT:  ; %bb.5: ; %.demote1
  ; SI-NEXT:    s_mov_b64 exec, 0
-; SI-NEXT:  ; %bb.6: ; %.continue1
+; SI-NEXT:  BB6_6: ; %.continue1
  ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; SI-NEXT:    v_bfrev_b32_e32 v0, 60
  ; SI-NEXT:    v_mov_b32_e32 v1, 0x3c00
@@ -705,13 +719,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; GFX9-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; GFX9-NEXT:    s_cbranch_execz BB6_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote0
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX9-NEXT:  ; %bb.2: ; %.demote0
  ; GFX9-NEXT:    s_wqm_b64 s[4:5], s[0:1]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[4:5]
-; GFX9-NEXT:  ; %bb.3: ; %.continue0
+; GFX9-NEXT:  BB6_3: ; %.continue0
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX9-NEXT:    s_mov_b64 s[2:3], s[0:1]
  ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
@@ -727,12 +742,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX9-NEXT:    s_or_b64 s[2:3], s[2:3], vcc
  ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; GFX9-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; GFX9-NEXT:    s_cbranch_execz BB6_6
  ; GFX9-NEXT:  ; %bb.4: ; %.demote1
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX9-NEXT:  ; %bb.5: ; %.demote1
  ; GFX9-NEXT:    s_mov_b64 exec, 0
-; GFX9-NEXT:  ; %bb.6: ; %.continue1
+; GFX9-NEXT:  BB6_6: ; %.continue1
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX9-NEXT:    v_mov_b32_e32 v0, 0x3c00
  ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 60
@@ -751,13 +767,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX10-32-NEXT:    s_cbranch_execz BB6_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-32-NEXT:    s_wqm_b32 s2, s0
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s2
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue0
+; GFX10-32-NEXT:  BB6_3: ; %.continue0
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
  ; GFX10-32-NEXT:    s_mov_b32 s1, s0
  ; GFX10-32-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s1
@@ -771,12 +788,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-32-NEXT:    s_or_b32 s1, s1, vcc_lo
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s2, s1
  ; GFX10-32-NEXT:    s_xor_b32 s1, exec_lo, s2
+; GFX10-32-NEXT:    s_cbranch_execz BB6_6
  ; GFX10-32-NEXT:  ; %bb.4: ; %.demote1
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-32-NEXT:  ; %bb.5: ; %.demote1
  ; GFX10-32-NEXT:    s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT:  ; %bb.6: ; %.continue1
+; GFX10-32-NEXT:  BB6_6: ; %.continue1
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
  ; GFX10-32-NEXT:    v_mov_b32_e32 v0, 0x3c00
  ; GFX10-32-NEXT:    v_bfrev_b32_e32 v1, 60
@@ -795,13 +813,14 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; GFX10-64-NEXT:    s_cbranch_execz BB6_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-64-NEXT:    s_wqm_b64 s[4:5], s[0:1]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[4:5]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue0
+; GFX10-64-NEXT:  BB6_3: ; %.continue0
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX10-64-NEXT:    s_mov_b64 s[2:3], s[0:1]
  ; GFX10-64-NEXT:    v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
@@ -815,12 +834,13 @@ define amdgpu_ps void @wqm_deriv(<2 x float> %input, float %arg, i32 %index) {
  ; GFX10-64-NEXT:    s_or_b64 s[2:3], s[2:3], vcc
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
  ; GFX10-64-NEXT:    s_xor_b64 s[2:3], exec, s[4:5]
+; GFX10-64-NEXT:    s_cbranch_execz BB6_6
  ; GFX10-64-NEXT:  ; %bb.4: ; %.demote1
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB6_7
  ; GFX10-64-NEXT:  ; %bb.5: ; %.demote1
  ; GFX10-64-NEXT:    s_mov_b64 exec, 0
-; GFX10-64-NEXT:  ; %bb.6: ; %.continue1
+; GFX10-64-NEXT:  BB6_6: ; %.continue1
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[2:3]
  ; GFX10-64-NEXT:    v_mov_b32_e32 v0, 0x3c00
  ; GFX10-64-NEXT:    v_bfrev_b32_e32 v1, 60
@@ -875,13 +895,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB7_3
  ; SI-NEXT:  ; %bb.1: ; %.demote0
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB7_9
  ; SI-NEXT:  ; %bb.2: ; %.demote0
  ; SI-NEXT:    s_wqm_b64 s[6:7], s[0:1]
  ; SI-NEXT:    s_and_b64 exec, exec, s[6:7]
-; SI-NEXT:  ; %bb.3: ; %.continue0.preheader
+; SI-NEXT:  BB7_3: ; %.continue0.preheader
  ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; SI-NEXT:    s_mov_b64 s[4:5], 0
  ; SI-NEXT:    s_branch BB7_5
@@ -940,13 +961,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX9-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; GFX9-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX9-NEXT:    s_cbranch_execz BB7_3
  ; GFX9-NEXT:  ; %bb.1: ; %.demote0
  ; GFX9-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX9-NEXT:    s_cbranch_scc0 BB7_9
  ; GFX9-NEXT:  ; %bb.2: ; %.demote0
  ; GFX9-NEXT:    s_wqm_b64 s[6:7], s[0:1]
  ; GFX9-NEXT:    s_and_b64 exec, exec, s[6:7]
-; GFX9-NEXT:  ; %bb.3: ; %.continue0.preheader
+; GFX9-NEXT:  BB7_3: ; %.continue0.preheader
  ; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; GFX9-NEXT:    s_mov_b64 s[4:5], 0
  ; GFX9-NEXT:    s_branch BB7_5
@@ -1005,13 +1027,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; GFX10-32-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
  ; GFX10-32-NEXT:    s_and_saveexec_b32 s2, vcc_lo
  ; GFX10-32-NEXT:    s_xor_b32 s2, exec_lo, s2
+; GFX10-32-NEXT:    s_cbranch_execz BB7_3
  ; GFX10-32-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-32-NEXT:    s_cbranch_scc0 BB7_9
  ; GFX10-32-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-32-NEXT:    s_wqm_b32 s3, s0
  ; GFX10-32-NEXT:    s_and_b32 exec_lo, exec_lo, s3
-; GFX10-32-NEXT:  ; %bb.3: ; %.continue0.preheader
+; GFX10-32-NEXT:  BB7_3: ; %.continue0.preheader
  ; GFX10-32-NEXT:    s_or_b32 exec_lo, exec_lo, s2
  ; GFX10-32-NEXT:    s_mov_b32 s2, 0
  ; GFX10-32-NEXT:    s_branch BB7_5
@@ -1067,13 +1090,14 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
  ; GFX10-64-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
  ; GFX10-64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; GFX10-64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-64-NEXT:    s_cbranch_execz BB7_3
  ; GFX10-64-NEXT:  ; %bb.1: ; %.demote0
  ; GFX10-64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-64-NEXT:    s_cbranch_scc0 BB7_9
  ; GFX10-64-NEXT:  ; %bb.2: ; %.demote0
  ; GFX10-64-NEXT:    s_wqm_b64 s[6:7], s[0:1]
  ; GFX10-64-NEXT:    s_and_b64 exec, exec, s[6:7]
-; GFX10-64-NEXT:  ; %bb.3: ; %.continue0.preheader
+; GFX10-64-NEXT:  BB7_3: ; %.continue0.preheader
  ; GFX10-64-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; GFX10-64-NEXT:    s_mov_b64 s[4:5], 0
  ; GFX10-64-NEXT:    s_branch BB7_5
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-gpr-idx-mode.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-gpr-idx-mode.mir

index 0f0d210..3dddb0f 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-gpr-idx-mode.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-gpr-idx-mode.mir
@@ -1,5 +1,5 @@
  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
  # Make sure mandatory skips are not removed around mode defs.
  # FIXME: -amdgpu-skip-threshold seems to be backwards.
  
diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir

index ee72fa9..58b1ab9 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
+++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir
@@ -1,5 +1,5 @@
  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs  %s -o - | FileCheck %s
  # Make sure mandatory skips are not removed around mode defs.
  # FIXME: -amdgpu-skip-threshold seems to be backwards.
  
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir

index 5979720..4c53c51 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -1,5 +1,5 @@
  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=1000000 -o -  %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-pre-emit-peephole -amdgpu-skip-threshold=1000000 -o -  %s | FileCheck %s
  
  ---
  name: skip_branch_taildup_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

index f535e28..690fe5a 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1002,13 +1002,14 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
  ; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
  ; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT:    s_cbranch_execz BB13_3
  ; SI-NEXT:  ; %bb.1: ; %bb3
  ; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
  ; SI-NEXT:    s_cbranch_scc0 BB13_6
  ; SI-NEXT:  ; %bb.2: ; %bb3
  ; SI-NEXT:    s_andn2_b64 exec, exec, vcc
-; SI-NEXT:  ; %bb.3: ; %bb4
+; SI-NEXT:  BB13_3: ; %bb4
  ; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; SI-NEXT:    s_mov_b32 s1, s0
  ; SI-NEXT:    s_mov_b32 s2, s0
@@ -1043,13 +1044,14 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
  ; GFX10-WAVE64-NEXT:    s_mov_b32 s0, 0
  ; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
  ; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-WAVE64-NEXT:    s_cbranch_execz BB13_3
  ; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb3
  ; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
  ; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
  ; GFX10-WAVE64-NEXT:    s_cbranch_scc0 BB13_6
  ; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb3
  ; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
-; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb4
+; GFX10-WAVE64-NEXT:  BB13_3: ; %bb4
  ; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
  ; GFX10-WAVE64-NEXT:    s_mov_b32 s1, s0
  ; GFX10-WAVE64-NEXT:    s_mov_b32 s2, s0
@@ -1082,13 +1084,14 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2,
  ; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
  ; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s2, vcc_lo
  ; GFX10-WAVE32-NEXT:    s_xor_b32 s2, exec_lo, s2
+; GFX10-WAVE32-NEXT:    s_cbranch_execz BB13_3
  ; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb3
  ; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
  ; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
  ; GFX10-WAVE32-NEXT:    s_cbranch_scc0 BB13_6
  ; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb3
  ; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
-; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb4
+; GFX10-WAVE32-NEXT:  BB13_3: ; %bb4
  ; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s2
  ; GFX10-WAVE32-NEXT:    s_mov_b32 s1, s0
  ; GFX10-WAVE32-NEXT:    s_mov_b32 s2, s0
@@ -1154,12 +1157,13 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
  ; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
  ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; SI-NEXT:    s_cbranch_execz BB14_3
  ; SI-NEXT:  ; %bb.1: ; %kill
  ; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; SI-NEXT:    s_cbranch_scc0 BB14_6
  ; SI-NEXT:  ; %bb.2: ; %kill
  ; SI-NEXT:    s_mov_b64 exec, 0
-; SI-NEXT:  ; %bb.3: ; %Flow
+; SI-NEXT:  BB14_3: ; %Flow
  ; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
  ; SI-NEXT:    ; implicit-def: $vgpr2
  ; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
@@ -1190,12 +1194,13 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
  ; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
  ; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
  ; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
+; GFX10-WAVE64-NEXT:    s_cbranch_execz BB14_3
  ; GFX10-WAVE64-NEXT:  ; %bb.1: ; %kill
  ; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
  ; GFX10-WAVE64-NEXT:    s_cbranch_scc0 BB14_6
  ; GFX10-WAVE64-NEXT:  ; %bb.2: ; %kill
  ; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
-; GFX10-WAVE64-NEXT:  ; %bb.3: ; %Flow
+; GFX10-WAVE64-NEXT:  BB14_3: ; %Flow
  ; GFX10-WAVE64-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
  ; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr2
  ; GFX10-WAVE64-NEXT:    s_xor_b64 exec, exec, s[0:1]
@@ -1226,12 +1231,13 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
  ; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
  ; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
  ; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
+; GFX10-WAVE32-NEXT:    s_cbranch_execz BB14_3
  ; GFX10-WAVE32-NEXT:  ; %bb.1: ; %kill
  ; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
  ; GFX10-WAVE32-NEXT:    s_cbranch_scc0 BB14_6
  ; GFX10-WAVE32-NEXT:  ; %bb.2: ; %kill
  ; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
-; GFX10-WAVE32-NEXT:  ; %bb.3: ; %Flow
+; GFX10-WAVE32-NEXT:  BB14_3: ; %Flow
  ; GFX10-WAVE32-NEXT:    s_or_saveexec_b32 s0, s1
  ; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr2
  ; GFX10-WAVE32-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll

index 9f42347..e5a019e 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -108,25 +108,26 @@ define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(floa
    ; GCN:   liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
    ; GCN:   $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
    ; GCN: bb.4.Flow1:
-  ; GCN:   successors: %bb.5(0x40000000)
+  ; GCN:   successors: %bb.5(0x40000000), %bb.7(0x40000000)
    ; GCN:   liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
    ; GCN:   renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
    ; GCN:   $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+  ; GCN:   S_CBRANCH_EXECZ %bb.7, implicit $exec
    ; GCN: bb.5.kill0:
-  ; GCN:   successors: %bb.8(0x40000000), %bb.7(0x40000000)
+  ; GCN:   successors: %bb.6(0x40000000), %bb.8(0x40000000)
    ; GCN:   liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
    ; GCN:   dead renamable $sgpr0_sgpr1 = S_ANDN2_B64 killed renamable $sgpr0_sgpr1, $exec, implicit-def $scc
-  ; GCN:   S_CBRANCH_SCC0 %bb.7, implicit $scc
-  ; GCN: bb.8.kill0:
-  ; GCN:   successors: %bb.6(0x80000000)
+  ; GCN:   S_CBRANCH_SCC0 %bb.8, implicit $scc
+  ; GCN: bb.6.kill0:
+  ; GCN:   successors: %bb.7(0x80000000)
    ; GCN:   liveins: $sgpr2_sgpr3, $scc
    ; GCN:   $exec = S_MOV_B64 0
-  ; GCN: bb.6.end:
+  ; GCN: bb.7.end:
    ; GCN:   successors: %bb.9(0x80000000)
    ; GCN:   liveins: $sgpr2_sgpr3
    ; GCN:   $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
    ; GCN:   S_BRANCH %bb.9
-  ; GCN: bb.7:
+  ; GCN: bb.8:
    ; GCN:   $exec = S_MOV_B64 0
    ; GCN:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
    ; GCN:   S_ENDPGM 0
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn

index 6b50aa5..3693e70 100644 (file)
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
@@ -222,7 +222,6 @@ static_library("LLVMAMDGPUCodeGen") {
      "SIPreEmitPeephole.cpp",
      "SIProgramInfo.cpp",
      "SIRegisterInfo.cpp",
-    "SIRemoveShortExecBranches.cpp",
      "SIShrinkInstructions.cpp",
      "SIWholeQuadMode.cpp",
    ]
author	Carl Ritson <carl.ritson@amd.com>
	Sat, 20 Mar 2021 01:29:08 +0000 (10:29 +0900)
committer	Carl Ritson <carl.ritson@amd.com>
	Sat, 20 Mar 2021 02:26:42 +0000 (11:26 +0900)
llvm/lib/Target/AMDGPU/AMDGPU.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/CMakeLists.txt		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIRemoveShortExecBranches.cpp	[deleted file]	patch \| blob \| history
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/insert-skips-flat-vmem-ds.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/insert-skips-gws.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/insert-skips-ignored-insts.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-gpr-idx-mode.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/skip-if-dead.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll		patch \| blob \| history
llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn		patch \| blob \| history