void initializeSILowerControlFlowPass(PassRegistry &);
extern char &SILowerControlFlowID;
-void initializeSIRemoveShortExecBranchesPass(PassRegistry &);
-extern char &SIRemoveShortExecBranchesID;
-
void initializeSIPreEmitPeepholePass(PassRegistry &);
extern char &SIPreEmitPeepholeID;
initializeSIModeRegisterPass(*PR);
initializeSIWholeQuadModePass(*PR);
initializeSILowerControlFlowPass(*PR);
- initializeSIRemoveShortExecBranchesPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
initializeSIInsertSkipsPass(*PR);
initializeSIMemoryLegalizerPass(*PR);
if (getOptLevel() > CodeGenOpt::None)
addPass(&SIInsertHardClausesID);
- addPass(&SIRemoveShortExecBranchesID);
addPass(&SIInsertSkipsPassID);
addPass(&SIPreEmitPeepholeID);
// The hazard recognizer that runs as part of the post-ra scheduler does not
SIPreEmitPeephole.cpp
SIProgramInfo.cpp
SIRegisterInfo.cpp
- SIRemoveShortExecBranches.cpp
SIShrinkInstructions.cpp
SIWholeQuadMode.cpp
GCNILPSched.cpp
#define DEBUG_TYPE "si-pre-emit-peephole"
+static unsigned SkipThreshold;
+
+static cl::opt<unsigned, true> SkipThresholdFlag(
+ "amdgpu-skip-threshold", cl::Hidden,
+ cl::desc(
+ "Number of instructions before jumping over divergent control flow"),
+ cl::location(SkipThreshold), cl::init(12));
+
namespace {
class SIPreEmitPeephole : public MachineFunctionPass {
bool optimizeVccBranch(MachineInstr &MI) const;
bool optimizeSetGPR(MachineInstr &First, MachineInstr &MI) const;
+ bool getBlockDestinations(MachineBasicBlock &SrcMBB,
+ MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB,
+ SmallVectorImpl<MachineOperand> &Cond);
+ bool mustRetainExeczBranch(const MachineBasicBlock &From,
+ const MachineBasicBlock &To) const;
+ bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
public:
static char ID;
return true;
}
+bool SIPreEmitPeephole::getBlockDestinations(
+ MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
+ MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
+ if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ if (!FalseMBB)
+ FalseMBB = SrcMBB.getNextNode();
+
+ return true;
+}
+
+bool SIPreEmitPeephole::mustRetainExeczBranch(
+ const MachineBasicBlock &From, const MachineBasicBlock &To) const {
+ unsigned NumInstr = 0;
+ const MachineFunction *MF = From.getParent();
+
+ for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
+ MBBI != End && MBBI != ToI; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ // When a uniform loop is inside non-uniform control flow, the branch
+ // leaving the loop might never be taken when EXEC = 0.
+ // Hence we should retain cbranch out of the loop lest it become infinite.
+ if (I->isConditionalBranch())
+ return true;
+
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+ return true;
+
+ // These instructions are potentially expensive even if EXEC = 0.
+ if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
+ TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
+ return true;
+
+ ++NumInstr;
+ if (NumInstr >= SkipThreshold)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Returns true if the skip branch instruction is removed.
+bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
+ MachineBasicBlock &SrcMBB) {
+ MachineBasicBlock *TrueMBB = nullptr;
+ MachineBasicBlock *FalseMBB = nullptr;
+ SmallVector<MachineOperand, 1> Cond;
+
+ if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
+ return false;
+
+ // Consider only the forward branches.
+ if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
+ mustRetainExeczBranch(*FalseMBB, *TrueMBB))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
+ MI.eraseFromParent();
+ SrcMBB.removeSuccessor(TrueMBB);
+
+ return true;
+}
+
bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
MachineBasicBlock *EmptyMBBAtEnd = nullptr;
bool Changed = false;
+ MF.RenumberBlocks();
+
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator MBBE = MBB.getFirstTerminator();
MachineBasicBlock::iterator TermI = MBBE;
- // Check first terminator for VCC branches to optimize
+ // Check first terminator for branches to optimize
if (TermI != MBB.end()) {
MachineInstr &MI = *TermI;
switch (MI.getOpcode()) {
case AMDGPU::S_CBRANCH_VCCNZ:
Changed |= optimizeVccBranch(MI);
continue;
+ case AMDGPU::S_CBRANCH_EXECZ:
+ Changed |= removeExeczBranch(MI, MBB);
+ continue;
default:
break;
}
+++ /dev/null
-//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This pass optmizes the s_cbranch_execz instructions.
-/// The pass removes this skip instruction for short branches,
-/// if there is no unwanted sideeffect in the fallthrough code sequence.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "si-remove-short-exec-branches"
-
-static unsigned SkipThreshold;
-
-static cl::opt<unsigned, true> SkipThresholdFlag(
- "amdgpu-skip-threshold", cl::Hidden,
- cl::desc(
- "Number of instructions before jumping over divergent control flow"),
- cl::location(SkipThreshold), cl::init(12));
-
-namespace {
-
-class SIRemoveShortExecBranches : public MachineFunctionPass {
-private:
- const SIInstrInfo *TII = nullptr;
- bool getBlockDestinations(MachineBasicBlock &SrcMBB,
- MachineBasicBlock *&TrueMBB,
- MachineBasicBlock *&FalseMBB,
- SmallVectorImpl<MachineOperand> &Cond);
- bool mustRetainExeczBranch(const MachineBasicBlock &From,
- const MachineBasicBlock &To) const;
- bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
-
-public:
- static char ID;
-
- SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
- initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // End anonymous namespace.
-
-INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
- "SI remove short exec branches", false, false)
-
-char SIRemoveShortExecBranches::ID = 0;
-
-char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;
-
-bool SIRemoveShortExecBranches::getBlockDestinations(
- MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
- MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
- if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
- return false;
-
- if (!FalseMBB)
- FalseMBB = SrcMBB.getNextNode();
-
- return true;
-}
-
-bool SIRemoveShortExecBranches::mustRetainExeczBranch(
- const MachineBasicBlock &From, const MachineBasicBlock &To) const {
- unsigned NumInstr = 0;
- const MachineFunction *MF = From.getParent();
-
- for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
- MBBI != End && MBBI != ToI; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- // When a uniform loop is inside non-uniform control flow, the branch
- // leaving the loop might never be taken when EXEC = 0.
- // Hence we should retain cbranch out of the loop lest it become infinite.
- if (I->isConditionalBranch())
- return true;
-
- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
- return true;
-
- if (TII->isKillTerminator(I->getOpcode()))
- return true;
-
- // These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
- TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
- return true;
-
- ++NumInstr;
- if (NumInstr >= SkipThreshold)
- return true;
- }
- }
-
- return false;
-}
-
-// Returns true if the skip branch instruction is removed.
-bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
- MachineBasicBlock &SrcMBB) {
- MachineBasicBlock *TrueMBB = nullptr;
- MachineBasicBlock *FalseMBB = nullptr;
- SmallVector<MachineOperand, 1> Cond;
-
- if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
- return false;
-
- // Consider only the forward branches.
- if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
- mustRetainExeczBranch(*FalseMBB, *TrueMBB))
- return false;
-
- LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
- MI.eraseFromParent();
- SrcMBB.removeSuccessor(TrueMBB);
-
- return true;
-}
-
-bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- TII = ST.getInstrInfo();
- MF.RenumberBlocks();
- bool Changed = false;
-
- for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
- if (MBBI == MBB.end())
- continue;
-
- MachineInstr &MI = *MBBI;
- switch (MI.getOpcode()) {
- case AMDGPU::S_CBRANCH_EXECZ:
- Changed = removeExeczBranch(MI, MBB);
- break;
- default:
- break;
- }
- }
-
- return Changed;
-}
; SI-NEXT: s_xor_b64 s[2:3], vcc, -1
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB2_3
; SI-NEXT: ; %bb.1: ; %.demote
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB2_4
; SI-NEXT: ; %bb.2: ; %.demote
; SI-NEXT: s_mov_b64 exec, 0
-; SI-NEXT: ; %bb.3: ; %.continue
+; SI-NEXT: BB2_3: ; %.continue
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; GFX9-NEXT: s_xor_b64 s[2:3], vcc, -1
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; GFX9-NEXT: s_cbranch_execz BB2_3
; GFX9-NEXT: ; %bb.1: ; %.demote
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB2_4
; GFX9-NEXT: ; %bb.2: ; %.demote
; GFX9-NEXT: s_mov_b64 exec, 0
-; GFX9-NEXT: ; %bb.3: ; %.continue
+; GFX9-NEXT: BB2_3: ; %.continue
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; GFX10-32-NEXT: s_xor_b32 s1, vcc_lo, -1
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
+; GFX10-32-NEXT: s_cbranch_execz BB2_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB2_4
; GFX10-32-NEXT: ; %bb.2: ; %.demote
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT: ; %bb.3: ; %.continue
+; GFX10-32-NEXT: BB2_3: ; %.continue
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; GFX10-64-NEXT: s_xor_b64 s[2:3], vcc, -1
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; GFX10-64-NEXT: s_cbranch_execz BB2_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB2_4
; GFX10-64-NEXT: ; %bb.2: ; %.demote
; GFX10-64-NEXT: s_mov_b64 exec, 0
-; GFX10-64-NEXT: ; %bb.3: ; %.continue
+; GFX10-64-NEXT: BB2_3: ; %.continue
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT: s_cbranch_execz BB3_3
; SI-NEXT: ; %bb.1: ; %.demote
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; SI-NEXT: s_cbranch_scc0 BB3_4
; SI-NEXT: ; %bb.2: ; %.demote
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
-; SI-NEXT: ; %bb.3: ; %.continue
+; SI-NEXT: BB3_3: ; %.continue
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
; SI-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; SI-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT: s_cbranch_execz BB3_3
; GFX9-NEXT: ; %bb.1: ; %.demote
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX9-NEXT: s_cbranch_scc0 BB3_4
; GFX9-NEXT: ; %bb.2: ; %.demote
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT: ; %bb.3: ; %.continue
+; GFX9-NEXT: BB3_3: ; %.continue
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT: s_cbranch_execz BB3_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB3_4
; GFX10-32-NEXT: ; %bb.2: ; %.demote
; GFX10-32-NEXT: s_wqm_b32 s28, s12
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT: ; %bb.3: ; %.continue
+; GFX10-32-NEXT: BB3_3: ; %.continue
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
; GFX10-32-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX10-64-NEXT: s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT: s_cbranch_execz BB3_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB3_4
; GFX10-64-NEXT: ; %bb.2: ; %.demote
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue
+; GFX10-64-NEXT: BB3_3: ; %.continue
; GFX10-64-NEXT: s_or_b64 exec, exec, s[28:29]
; GFX10-64-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT: s_cbranch_execz BB4_3
; SI-NEXT: ; %bb.1: ; %.demote
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; SI-NEXT: s_cbranch_scc0 BB4_4
; SI-NEXT: ; %bb.2: ; %.demote
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
-; SI-NEXT: ; %bb.3: ; %.continue
+; SI-NEXT: BB4_3: ; %.continue
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
; SI-NEXT: v_add_f32_e32 v0, v0, v0
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT: s_cbranch_execz BB4_3
; GFX9-NEXT: ; %bb.1: ; %.demote
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX9-NEXT: s_cbranch_scc0 BB4_4
; GFX9-NEXT: ; %bb.2: ; %.demote
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT: ; %bb.3: ; %.continue
+; GFX9-NEXT: BB4_3: ; %.continue
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
; GFX9-NEXT: v_add_f32_e32 v0, v0, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT: s_cbranch_execz BB4_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB4_4
; GFX10-32-NEXT: ; %bb.2: ; %.demote
; GFX10-32-NEXT: s_wqm_b32 s28, s12
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT: ; %bb.3: ; %.continue
+; GFX10-32-NEXT: BB4_3: ; %.continue
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
; GFX10-32-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX10-64-NEXT: s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT: s_cbranch_execz BB4_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB4_4
; GFX10-64-NEXT: ; %bb.2: ; %.demote
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue
+; GFX10-64-NEXT: BB4_3: ; %.continue
; GFX10-64-NEXT: s_or_b64 exec, exec, s[28:29]
; GFX10-64-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB6_3
; SI-NEXT: ; %bb.1: ; %.demote0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB6_7
; SI-NEXT: ; %bb.2: ; %.demote0
; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
; SI-NEXT: s_and_b64 exec, exec, s[6:7]
-; SI-NEXT: ; %bb.3: ; %.continue0
+; SI-NEXT: BB6_3: ; %.continue0
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_mov_b64 s[4:5], s[0:1]
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[4:5]
; SI-NEXT: s_xor_b64 s[4:5], s[4:5], -1
; SI-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
; SI-NEXT: s_xor_b64 s[4:5], exec, s[6:7]
+; SI-NEXT: s_cbranch_execz BB6_6
; SI-NEXT: ; %bb.4: ; %.demote1
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB6_7
; SI-NEXT: ; %bb.5: ; %.demote1
; SI-NEXT: s_mov_b64 exec, 0
-; SI-NEXT: ; %bb.6: ; %.continue1
+; SI-NEXT: BB6_6: ; %.continue1
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; GFX9-NEXT: s_cbranch_execz BB6_3
; GFX9-NEXT: ; %bb.1: ; %.demote0
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB6_7
; GFX9-NEXT: ; %bb.2: ; %.demote0
; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
-; GFX9-NEXT: ; %bb.3: ; %.continue0
+; GFX9-NEXT: BB6_3: ; %.continue0
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; GFX9-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; GFX9-NEXT: s_cbranch_execz BB6_6
; GFX9-NEXT: ; %bb.4: ; %.demote1
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB6_7
; GFX9-NEXT: ; %bb.5: ; %.demote1
; GFX9-NEXT: s_mov_b64 exec, 0
-; GFX9-NEXT: ; %bb.6: ; %.continue1
+; GFX9-NEXT: BB6_6: ; %.continue1
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX10-32-NEXT: s_cbranch_execz BB6_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
; GFX10-32-NEXT: s_wqm_b32 s2, s0
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s2
-; GFX10-32-NEXT: ; %bb.3: ; %.continue0
+; GFX10-32-NEXT: BB6_3: ; %.continue0
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-32-NEXT: s_mov_b32 s1, s0
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s1
; GFX10-32-NEXT: s_xor_b32 s1, s1, -1
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
+; GFX10-32-NEXT: s_cbranch_execz BB6_6
; GFX10-32-NEXT: ; %bb.4: ; %.demote1
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
; GFX10-32-NEXT: ; %bb.5: ; %.demote1
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT: ; %bb.6: ; %.continue1
+; GFX10-32-NEXT: BB6_6: ; %.continue1
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; GFX10-64-NEXT: s_cbranch_execz BB6_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue0
+; GFX10-64-NEXT: BB6_3: ; %.continue0
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-64-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; GFX10-64-NEXT: s_xor_b64 s[2:3], s[2:3], -1
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; GFX10-64-NEXT: s_cbranch_execz BB6_6
; GFX10-64-NEXT: ; %bb.4: ; %.demote1
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
; GFX10-64-NEXT: ; %bb.5: ; %.demote1
; GFX10-64-NEXT: s_mov_b64 exec, 0
-; GFX10-64-NEXT: ; %bb.6: ; %.continue1
+; GFX10-64-NEXT: BB6_6: ; %.continue1
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB7_3
; SI-NEXT: ; %bb.1: ; %.demote0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB7_9
; SI-NEXT: ; %bb.2: ; %.demote0
; SI-NEXT: s_wqm_b64 s[8:9], s[0:1]
; SI-NEXT: s_and_b64 exec, exec, s[8:9]
-; SI-NEXT: ; %bb.3: ; %.continue0.preheader
+; SI-NEXT: BB7_3: ; %.continue0.preheader
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_mov_b64 s[4:5], 0
; SI-NEXT: v_mov_b32_e32 v0, s6
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; GFX9-NEXT: s_cbranch_execz BB7_3
; GFX9-NEXT: ; %bb.1: ; %.demote0
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB7_9
; GFX9-NEXT: ; %bb.2: ; %.demote0
; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
-; GFX9-NEXT: ; %bb.3: ; %.continue0.preheader
+; GFX9-NEXT: BB7_3: ; %.continue0.preheader
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_mov_b64 s[2:3], 0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s2, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s2, exec_lo, s2
+; GFX10-32-NEXT: s_cbranch_execz BB7_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB7_9
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
; GFX10-32-NEXT: s_wqm_b32 s3, s0
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
-; GFX10-32-NEXT: ; %bb.3: ; %.continue0.preheader
+; GFX10-32-NEXT: BB7_3: ; %.continue0.preheader
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX10-32-NEXT: v_mov_b32_e32 v0, s1
; GFX10-32-NEXT: s_branch BB7_5
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-64-NEXT: s_cbranch_execz BB7_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB7_9
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue0.preheader
+; GFX10-64-NEXT: BB7_3: ; %.continue0.preheader
; GFX10-64-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX10-64-NEXT: v_mov_b32_e32 v0, s2
; GFX10-64-NEXT: s_mov_b64 s[2:3], 0
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
---
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
---
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-remove-short-exec-branches -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=3 %s -o - | FileCheck %s
---
; SI-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
; SI-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB2_3
; SI-NEXT: ; %bb.1: ; %.demote
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; SI-NEXT: s_cbranch_scc0 BB2_4
; SI-NEXT: ; %bb.2: ; %.demote
; SI-NEXT: s_mov_b64 exec, 0
-; SI-NEXT: ; %bb.3: ; %.continue
+; SI-NEXT: BB2_3: ; %.continue
; SI-NEXT: s_or_b64 exec, exec, s[0:1]
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
; SI-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
+; GFX9-NEXT: s_cbranch_execz BB2_3
; GFX9-NEXT: ; %bb.1: ; %.demote
; GFX9-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GFX9-NEXT: s_cbranch_scc0 BB2_4
; GFX9-NEXT: ; %bb.2: ; %.demote
; GFX9-NEXT: s_mov_b64 exec, 0
-; GFX9-NEXT: ; %bb.3: ; %.continue
+; GFX9-NEXT: BB2_3: ; %.continue
; GFX9-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
; GFX9-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; GFX10-32-NEXT: v_cmp_eq_u32_e64 s0, 1, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s0
; GFX10-32-NEXT: s_xor_b32 s0, exec_lo, s2
+; GFX10-32-NEXT: s_cbranch_execz BB2_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote
; GFX10-32-NEXT: s_andn2_b32 s1, s1, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB2_4
; GFX10-32-NEXT: ; %bb.2: ; %.demote
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT: ; %bb.3: ; %.continue
+; GFX10-32-NEXT: BB2_3: ; %.continue
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc_lo
; GFX10-32-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; GFX10-64-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[0:1]
; GFX10-64-NEXT: s_xor_b64 s[0:1], exec, s[4:5]
+; GFX10-64-NEXT: s_cbranch_execz BB2_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote
; GFX10-64-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB2_4
; GFX10-64-NEXT: ; %bb.2: ; %.demote
; GFX10-64-NEXT: s_mov_b64 exec, 0
-; GFX10-64-NEXT: ; %bb.3: ; %.continue
+; GFX10-64-NEXT: BB2_3: ; %.continue
; GFX10-64-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 0, 1.0, vcc
; GFX10-64-NEXT: exp mrt1 v0, v0, v0, v0 done vm
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT: s_cbranch_execz BB3_3
; SI-NEXT: ; %bb.1: ; %.demote
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; SI-NEXT: s_cbranch_scc0 BB3_4
; SI-NEXT: ; %bb.2: ; %.demote
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
-; SI-NEXT: ; %bb.3: ; %.continue
+; SI-NEXT: BB3_3: ; %.continue
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
; SI-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; SI-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT: s_cbranch_execz BB3_3
; GFX9-NEXT: ; %bb.1: ; %.demote
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX9-NEXT: s_cbranch_scc0 BB3_4
; GFX9-NEXT: ; %bb.2: ; %.demote
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT: ; %bb.3: ; %.continue
+; GFX9-NEXT: BB3_3: ; %.continue
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
; GFX9-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v1
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT: s_cbranch_execz BB3_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB3_4
; GFX10-32-NEXT: ; %bb.2: ; %.demote
; GFX10-32-NEXT: s_wqm_b32 s28, s12
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT: ; %bb.3: ; %.continue
+; GFX10-32-NEXT: BB3_3: ; %.continue
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
; GFX10-32-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10-32-NEXT: s_waitcnt vmcnt(0)
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v1
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX10-64-NEXT: s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT: s_cbranch_execz BB3_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB3_4
; GFX10-64-NEXT: ; %bb.2: ; %.demote
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue
+; GFX10-64-NEXT: BB3_3: ; %.continue
; GFX10-64-NEXT: s_or_b64 exec, exec, s[28:29]
; GFX10-64-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX10-64-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[14:15], vcc
; SI-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; SI-NEXT: s_cbranch_execz BB4_3
; SI-NEXT: ; %bb.1: ; %.demote
; SI-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; SI-NEXT: s_cbranch_scc0 BB4_4
; SI-NEXT: ; %bb.2: ; %.demote
; SI-NEXT: s_wqm_b64 s[16:17], s[12:13]
; SI-NEXT: s_and_b64 exec, exec, s[16:17]
-; SI-NEXT: ; %bb.3: ; %.continue
+; SI-NEXT: BB4_3: ; %.continue
; SI-NEXT: s_or_b64 exec, exec, s[14:15]
; SI-NEXT: v_add_f32_e32 v0, v0, v0
; SI-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX9-NEXT: s_xor_b64 s[14:15], exec, s[14:15]
+; GFX9-NEXT: s_cbranch_execz BB4_3
; GFX9-NEXT: ; %bb.1: ; %.demote
; GFX9-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX9-NEXT: s_cbranch_scc0 BB4_4
; GFX9-NEXT: ; %bb.2: ; %.demote
; GFX9-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX9-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX9-NEXT: ; %bb.3: ; %.continue
+; GFX9-NEXT: BB4_3: ; %.continue
; GFX9-NEXT: s_or_b64 exec, exec, s[14:15]
; GFX9-NEXT: v_add_f32_e32 v0, v0, v0
; GFX9-NEXT: s_and_b64 exec, exec, s[12:13]
; GFX10-32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s13, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s13, exec_lo, s13
+; GFX10-32-NEXT: s_cbranch_execz BB4_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote
; GFX10-32-NEXT: s_andn2_b32 s12, s12, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB4_4
; GFX10-32-NEXT: ; %bb.2: ; %.demote
; GFX10-32-NEXT: s_wqm_b32 s28, s12
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s28
-; GFX10-32-NEXT: ; %bb.3: ; %.continue
+; GFX10-32-NEXT: BB4_3: ; %.continue
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s13
; GFX10-32-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s12
; GFX10-64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[14:15], vcc
; GFX10-64-NEXT: s_xor_b64 s[28:29], exec, s[14:15]
+; GFX10-64-NEXT: s_cbranch_execz BB4_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote
; GFX10-64-NEXT: s_andn2_b64 s[12:13], s[12:13], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB4_4
; GFX10-64-NEXT: ; %bb.2: ; %.demote
; GFX10-64-NEXT: s_wqm_b64 s[16:17], s[12:13]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[16:17]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue
+; GFX10-64-NEXT: BB4_3: ; %.continue
; GFX10-64-NEXT: s_or_b64 exec, exec, s[28:29]
; GFX10-64-NEXT: v_add_f32_e32 v0, v0, v0
; GFX10-64-NEXT: s_and_b64 exec, exec, s[12:13]
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; SI-NEXT: s_cbranch_execz BB6_3
; SI-NEXT: ; %bb.1: ; %.demote0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB6_7
; SI-NEXT: ; %bb.2: ; %.demote0
; SI-NEXT: s_wqm_b64 s[4:5], s[0:1]
; SI-NEXT: s_and_b64 exec, exec, s[4:5]
-; SI-NEXT: ; %bb.3: ; %.continue0
+; SI-NEXT: BB6_3: ; %.continue0
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_mov_b64 s[2:3], s[0:1]
; SI-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; SI-NEXT: s_or_b64 s[2:3], s[2:3], vcc
; SI-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; SI-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB6_6
; SI-NEXT: ; %bb.4: ; %.demote1
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB6_7
; SI-NEXT: ; %bb.5: ; %.demote1
; SI-NEXT: s_mov_b64 exec, 0
-; SI-NEXT: ; %bb.6: ; %.continue1
+; SI-NEXT: BB6_6: ; %.continue1
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: v_bfrev_b32_e32 v0, 60
; SI-NEXT: v_mov_b32_e32 v1, 0x3c00
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; GFX9-NEXT: s_cbranch_execz BB6_3
; GFX9-NEXT: ; %bb.1: ; %.demote0
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB6_7
; GFX9-NEXT: ; %bb.2: ; %.demote0
; GFX9-NEXT: s_wqm_b64 s[4:5], s[0:1]
; GFX9-NEXT: s_and_b64 exec, exec, s[4:5]
-; GFX9-NEXT: ; %bb.3: ; %.continue0
+; GFX9-NEXT: BB6_3: ; %.continue0
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX9-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], vcc
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; GFX9-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; GFX9-NEXT: s_cbranch_execz BB6_6
; GFX9-NEXT: ; %bb.4: ; %.demote1
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB6_7
; GFX9-NEXT: ; %bb.5: ; %.demote1
; GFX9-NEXT: s_mov_b64 exec, 0
-; GFX9-NEXT: ; %bb.6: ; %.continue1
+; GFX9-NEXT: BB6_6: ; %.continue1
; GFX9-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX9-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX9-NEXT: v_bfrev_b32_e32 v1, 60
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX10-32-NEXT: s_cbranch_execz BB6_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
; GFX10-32-NEXT: s_wqm_b32 s2, s0
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s2
-; GFX10-32-NEXT: ; %bb.3: ; %.continue0
+; GFX10-32-NEXT: BB6_3: ; %.continue0
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-32-NEXT: s_mov_b32 s1, s0
; GFX10-32-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s1
; GFX10-32-NEXT: s_or_b32 s1, s1, vcc_lo
; GFX10-32-NEXT: s_and_saveexec_b32 s2, s1
; GFX10-32-NEXT: s_xor_b32 s1, exec_lo, s2
+; GFX10-32-NEXT: s_cbranch_execz BB6_6
; GFX10-32-NEXT: ; %bb.4: ; %.demote1
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB6_7
; GFX10-32-NEXT: ; %bb.5: ; %.demote1
; GFX10-32-NEXT: s_mov_b32 exec_lo, 0
-; GFX10-32-NEXT: ; %bb.6: ; %.continue1
+; GFX10-32-NEXT: BB6_6: ; %.continue1
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX10-32-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX10-32-NEXT: v_bfrev_b32_e32 v1, 60
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; GFX10-64-NEXT: s_cbranch_execz BB6_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
; GFX10-64-NEXT: s_wqm_b64 s[4:5], s[0:1]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[4:5]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue0
+; GFX10-64-NEXT: BB6_3: ; %.continue0
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-64-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX10-64-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[2:3]
; GFX10-64-NEXT: s_or_b64 s[2:3], s[2:3], vcc
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
; GFX10-64-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
+; GFX10-64-NEXT: s_cbranch_execz BB6_6
; GFX10-64-NEXT: ; %bb.4: ; %.demote1
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB6_7
; GFX10-64-NEXT: ; %bb.5: ; %.demote1
; GFX10-64-NEXT: s_mov_b64 exec, 0
-; GFX10-64-NEXT: ; %bb.6: ; %.continue1
+; GFX10-64-NEXT: BB6_6: ; %.continue1
; GFX10-64-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX10-64-NEXT: v_mov_b32_e32 v0, 0x3c00
; GFX10-64-NEXT: v_bfrev_b32_e32 v1, 60
; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB7_3
; SI-NEXT: ; %bb.1: ; %.demote0
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB7_9
; SI-NEXT: ; %bb.2: ; %.demote0
; SI-NEXT: s_wqm_b64 s[6:7], s[0:1]
; SI-NEXT: s_and_b64 exec, exec, s[6:7]
-; SI-NEXT: ; %bb.3: ; %.continue0.preheader
+; SI-NEXT: BB7_3: ; %.continue0.preheader
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_mov_b64 s[4:5], 0
; SI-NEXT: s_branch BB7_5
; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX9-NEXT: s_cbranch_execz BB7_3
; GFX9-NEXT: ; %bb.1: ; %.demote0
; GFX9-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX9-NEXT: s_cbranch_scc0 BB7_9
; GFX9-NEXT: ; %bb.2: ; %.demote0
; GFX9-NEXT: s_wqm_b64 s[6:7], s[0:1]
; GFX9-NEXT: s_and_b64 exec, exec, s[6:7]
-; GFX9-NEXT: ; %bb.3: ; %.continue0.preheader
+; GFX9-NEXT: BB7_3: ; %.continue0.preheader
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_mov_b64 s[4:5], 0
; GFX9-NEXT: s_branch BB7_5
; GFX10-32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GFX10-32-NEXT: s_and_saveexec_b32 s2, vcc_lo
; GFX10-32-NEXT: s_xor_b32 s2, exec_lo, s2
+; GFX10-32-NEXT: s_cbranch_execz BB7_3
; GFX10-32-NEXT: ; %bb.1: ; %.demote0
; GFX10-32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-32-NEXT: s_cbranch_scc0 BB7_9
; GFX10-32-NEXT: ; %bb.2: ; %.demote0
; GFX10-32-NEXT: s_wqm_b32 s3, s0
; GFX10-32-NEXT: s_and_b32 exec_lo, exec_lo, s3
-; GFX10-32-NEXT: ; %bb.3: ; %.continue0.preheader
+; GFX10-32-NEXT: BB7_3: ; %.continue0.preheader
; GFX10-32-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX10-32-NEXT: s_mov_b32 s2, 0
; GFX10-32-NEXT: s_branch BB7_5
; GFX10-64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GFX10-64-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX10-64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-64-NEXT: s_cbranch_execz BB7_3
; GFX10-64-NEXT: ; %bb.1: ; %.demote0
; GFX10-64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-64-NEXT: s_cbranch_scc0 BB7_9
; GFX10-64-NEXT: ; %bb.2: ; %.demote0
; GFX10-64-NEXT: s_wqm_b64 s[6:7], s[0:1]
; GFX10-64-NEXT: s_and_b64 exec, exec, s[6:7]
-; GFX10-64-NEXT: ; %bb.3: ; %.continue0.preheader
+; GFX10-64-NEXT: BB7_3: ; %.continue0.preheader
; GFX10-64-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX10-64-NEXT: s_mov_b64 s[4:5], 0
; GFX10-64-NEXT: s_branch BB7_5
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs.
# FIXME: -amdgpu-skip-threshold seems to be backwards.
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs.
# FIXME: -amdgpu-skip-threshold seems to be backwards.
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-remove-short-exec-branches -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-pre-emit-peephole -amdgpu-skip-threshold=1000000 -o - %s | FileCheck %s
---
name: skip_branch_taildup_endpgm
; SI-NEXT: v_cmp_nle_f32_e32 vcc, 0, v1
; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc
; SI-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; SI-NEXT: s_cbranch_execz BB13_3
; SI-NEXT: ; %bb.1: ; %bb3
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; SI-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; SI-NEXT: s_cbranch_scc0 BB13_6
; SI-NEXT: ; %bb.2: ; %bb3
; SI-NEXT: s_andn2_b64 exec, exec, vcc
-; SI-NEXT: ; %bb.3: ; %bb4
+; SI-NEXT: BB13_3: ; %bb4
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_mov_b32 s1, s0
; SI-NEXT: s_mov_b32 s2, s0
; GFX10-WAVE64-NEXT: s_mov_b32 s0, 0
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX10-WAVE64-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX10-WAVE64-NEXT: s_cbranch_execz BB13_3
; GFX10-WAVE64-NEXT: ; %bb.1: ; %bb3
; GFX10-WAVE64-NEXT: v_cmp_ngt_f32_e32 vcc, 0, v0
; GFX10-WAVE64-NEXT: s_andn2_b64 s[2:3], s[2:3], vcc
; GFX10-WAVE64-NEXT: s_cbranch_scc0 BB13_6
; GFX10-WAVE64-NEXT: ; %bb.2: ; %bb3
; GFX10-WAVE64-NEXT: s_andn2_b64 exec, exec, vcc
-; GFX10-WAVE64-NEXT: ; %bb.3: ; %bb4
+; GFX10-WAVE64-NEXT: BB13_3: ; %bb4
; GFX10-WAVE64-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX10-WAVE64-NEXT: s_mov_b32 s1, s0
; GFX10-WAVE64-NEXT: s_mov_b32 s2, s0
; GFX10-WAVE32-NEXT: s_mov_b32 s0, 0
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s2, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s2, exec_lo, s2
+; GFX10-WAVE32-NEXT: s_cbranch_execz BB13_3
; GFX10-WAVE32-NEXT: ; %bb.1: ; %bb3
; GFX10-WAVE32-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0, v0
; GFX10-WAVE32-NEXT: s_andn2_b32 s1, s1, vcc_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 BB13_6
; GFX10-WAVE32-NEXT: ; %bb.2: ; %bb3
; GFX10-WAVE32-NEXT: s_andn2_b32 exec_lo, exec_lo, vcc_lo
-; GFX10-WAVE32-NEXT: ; %bb.3: ; %bb4
+; GFX10-WAVE32-NEXT: BB13_3: ; %bb4
; GFX10-WAVE32-NEXT: s_or_b32 exec_lo, exec_lo, s2
; GFX10-WAVE32-NEXT: s_mov_b32 s1, s0
; GFX10-WAVE32-NEXT: s_mov_b32 s2, s0
; SI-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; SI-NEXT: s_cbranch_execz BB14_3
; SI-NEXT: ; %bb.1: ; %kill
; SI-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; SI-NEXT: s_cbranch_scc0 BB14_6
; SI-NEXT: ; %bb.2: ; %kill
; SI-NEXT: s_mov_b64 exec, 0
-; SI-NEXT: ; %bb.3: ; %Flow
+; SI-NEXT: BB14_3: ; %Flow
; SI-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; SI-NEXT: ; implicit-def: $vgpr2
; SI-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX10-WAVE64-NEXT: v_cmp_ge_f32_e32 vcc, 0, v1
; GFX10-WAVE64-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX10-WAVE64-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
+; GFX10-WAVE64-NEXT: s_cbranch_execz BB14_3
; GFX10-WAVE64-NEXT: ; %bb.1: ; %kill
; GFX10-WAVE64-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX10-WAVE64-NEXT: s_cbranch_scc0 BB14_6
; GFX10-WAVE64-NEXT: ; %bb.2: ; %kill
; GFX10-WAVE64-NEXT: s_mov_b64 exec, 0
-; GFX10-WAVE64-NEXT: ; %bb.3: ; %Flow
+; GFX10-WAVE64-NEXT: BB14_3: ; %Flow
; GFX10-WAVE64-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; GFX10-WAVE64-NEXT: ; implicit-def: $vgpr2
; GFX10-WAVE64-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX10-WAVE32-NEXT: v_cmp_ge_f32_e32 vcc_lo, 0, v1
; GFX10-WAVE32-NEXT: s_and_saveexec_b32 s1, vcc_lo
; GFX10-WAVE32-NEXT: s_xor_b32 s1, exec_lo, s1
+; GFX10-WAVE32-NEXT: s_cbranch_execz BB14_3
; GFX10-WAVE32-NEXT: ; %bb.1: ; %kill
; GFX10-WAVE32-NEXT: s_andn2_b32 s0, s0, exec_lo
; GFX10-WAVE32-NEXT: s_cbranch_scc0 BB14_6
; GFX10-WAVE32-NEXT: ; %bb.2: ; %kill
; GFX10-WAVE32-NEXT: s_mov_b32 exec_lo, 0
-; GFX10-WAVE32-NEXT: ; %bb.3: ; %Flow
+; GFX10-WAVE32-NEXT: BB14_3: ; %Flow
; GFX10-WAVE32-NEXT: s_or_saveexec_b32 s0, s1
; GFX10-WAVE32-NEXT: ; implicit-def: $vgpr2
; GFX10-WAVE32-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GCN: bb.4.Flow1:
- ; GCN: successors: %bb.5(0x40000000)
+ ; GCN: successors: %bb.5(0x40000000), %bb.7(0x40000000)
; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
; GCN: renamable $sgpr2_sgpr3 = S_OR_SAVEEXEC_B64 killed renamable $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
; GCN: $exec = S_XOR_B64 $exec, renamable $sgpr2_sgpr3, implicit-def $scc
+ ; GCN: S_CBRANCH_EXECZ %bb.7, implicit $exec
; GCN: bb.5.kill0:
- ; GCN: successors: %bb.8(0x40000000), %bb.7(0x40000000)
+ ; GCN: successors: %bb.6(0x40000000), %bb.8(0x40000000)
; GCN: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
; GCN: dead renamable $sgpr0_sgpr1 = S_ANDN2_B64 killed renamable $sgpr0_sgpr1, $exec, implicit-def $scc
- ; GCN: S_CBRANCH_SCC0 %bb.7, implicit $scc
- ; GCN: bb.8.kill0:
- ; GCN: successors: %bb.6(0x80000000)
+ ; GCN: S_CBRANCH_SCC0 %bb.8, implicit $scc
+ ; GCN: bb.6.kill0:
+ ; GCN: successors: %bb.7(0x80000000)
; GCN: liveins: $sgpr2_sgpr3, $scc
; GCN: $exec = S_MOV_B64 0
- ; GCN: bb.6.end:
+ ; GCN: bb.7.end:
; GCN: successors: %bb.9(0x80000000)
; GCN: liveins: $sgpr2_sgpr3
; GCN: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
; GCN: S_BRANCH %bb.9
- ; GCN: bb.7:
+ ; GCN: bb.8:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
"SIPreEmitPeephole.cpp",
"SIProgramInfo.cpp",
"SIRegisterInfo.cpp",
- "SIRemoveShortExecBranches.cpp",
"SIShrinkInstructions.cpp",
"SIWholeQuadMode.cpp",
]