bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
+ bool optimizeVccBranch(MachineInstr &MI) const;
+
public:
static char ID;
return true;
}
+bool SIInsertSkips::optimizeVccBranch(MachineInstr &MI) const {
+ // Match:
+ // sreg = -1
+ // vcc = S_AND_B64 exec, sreg
+ // S_CBRANCH_VCC[N]Z
+ // =>
+ // S_CBRANCH_EXEC[N]Z
+ bool Changed = false;
+ MachineBasicBlock &MBB = *MI.getParent();
+ const unsigned CondReg = AMDGPU::VCC;
+ const unsigned ExecReg = AMDGPU::EXEC;
+ const unsigned And = AMDGPU::S_AND_B64;
+
+ MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
+ E = MBB.rend();
+ bool ReadsCond = false;
+ unsigned Threshold = 5;
+ for (++A ; A != E ; ++A) {
+ if (!--Threshold)
+ return false;
+ if (A->modifiesRegister(ExecReg, TRI))
+ return false;
+ if (A->modifiesRegister(CondReg, TRI)) {
+ if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
+ return false;
+ break;
+ }
+ ReadsCond |= A->readsRegister(CondReg, TRI);
+ }
+ if (A == E)
+ return false;
+
+ MachineOperand &Op1 = A->getOperand(1);
+ MachineOperand &Op2 = A->getOperand(2);
+ if (Op1.getReg() != ExecReg && Op2.isReg() && Op2.getReg() == ExecReg) {
+ TII->commuteInstruction(*A);
+ Changed = true;
+ }
+ if (Op1.getReg() != ExecReg)
+ return Changed;
+ if (Op2.isImm() && Op2.getImm() != -1)
+ return Changed;
+
+ unsigned SReg = AMDGPU::NoRegister;
+ if (Op2.isReg()) {
+ SReg = Op2.getReg();
+ auto M = std::next(A);
+ bool ReadsSreg = false;
+ for ( ; M != E ; ++M) {
+ if (M->definesRegister(SReg, TRI))
+ break;
+ if (M->modifiesRegister(SReg, TRI))
+ return Changed;
+ ReadsSreg |= M->readsRegister(SReg, TRI);
+ }
+ if (M == E ||
+ !M->isMoveImmediate() ||
+ !M->getOperand(1).isImm() ||
+ M->getOperand(1).getImm() != -1)
+ return Changed;
+ // First if sreg is only used in and instruction fold the immediate
+ // into that and.
+ if (!ReadsSreg && Op2.isKill()) {
+ A->getOperand(2).ChangeToImmediate(-1);
+ M->eraseFromParent();
+ }
+ }
+
+ if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
+ MI.killsRegister(CondReg, TRI))
+ A->eraseFromParent();
+
+ bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
+ if (SReg == ExecReg) {
+ if (IsVCCZ) {
+ MI.eraseFromParent();
+ return true;
+ }
+ MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+ } else {
+ MI.setDesc(TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ
+ : AMDGPU::S_CBRANCH_EXECNZ));
+ }
+
+ MI.RemoveOperand(MI.findRegisterUseOperandIdx(CondReg, false /*Kill*/, TRI));
+ MI.addImplicitDefUseOperands(*MBB.getParent());
+
+ return true;
+}
+
bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
}
break;
+ case AMDGPU::S_CBRANCH_VCCZ:
+ case AMDGPU::S_CBRANCH_VCCNZ:
+ MadeChange |= optimizeVccBranch(MI);
+ break;
+
default:
break;
}
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-skips -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+# GCN-LABEL: name: and_execz_mov_vccz
+# GCN-NOT: S_MOV_
+# GCN-NOT: S_AND_
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_imm_vccz
+# GCN-NOT: S_AND_
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_imm_vccz
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execnz_imm_vccnz
+# GCN-NOT: S_AND_
+# GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+name: and_execnz_imm_vccnz
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_imm_vccz_live_scc
+# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_imm_vccz_live_scc
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $vcc = S_AND_B64 $exec, -1, implicit-def $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_live_scc
+# GCN-NOT: S_MOV_
+# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_live_scc
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_live_sreg
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_live_sreg
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_live_sreg_commute
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_live_sreg_commute
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $sgpr0_sgpr1, $exec, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_live_scc_commute
+# GCN-NOT: S_MOV_
+# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_live_scc_commute
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 killed $sgpr0_sgpr1, $exec, implicit-def $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_commute
+# GCN-NOT: S_MOV_
+# GCN-NOT: S_AND_
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_commute
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 killed $sgpr0_sgpr1, $exec, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_exec_vccz
+# GCN: $exec = S_MOV_B64 -1
+# GCN-NEXT: S_ENDPGM
+name: and_execz_mov_exec_vccz
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $exec = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, $exec, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_exec_vccnz
+# GCN: $exec = S_MOV_B64 -1
+# GCN-NEXT: S_BRANCH %bb.1{{$}}
+name: and_execz_mov_exec_vccnz
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $exec = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, $exec, implicit-def dead $scc
+ S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_reads_sreg_early
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
+# GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr1
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_reads_sreg_early
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $sgpr2 = S_MOV_B32 $sgpr1
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_reads_sreg_late
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
+# GCN-NEXT: $sgpr2 = S_MOV_B32 $sgpr1
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_reads_sreg_late
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, $sgpr0_sgpr1, implicit-def dead $scc
+ $sgpr2 = S_MOV_B32 $sgpr1
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+# GCN-LABEL: name: and_execz_mov_vccz_reads_writes_sreg_early
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
+# GCN-NEXT: $sgpr1 = S_MOV_B32 $sgpr0
+# GCN-NEXT: $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+# GCN-NEXT: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+name: and_execz_mov_vccz_reads_writes_sreg_early
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $sgpr1 = S_MOV_B32 $sgpr0
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_reads_cond
+# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def dead $scc
+# GCN-NEXT: $sgpr2 = S_MOV_B32 $vcc_lo
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name: and_execz_mov_vccz_reads_cond
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ $sgpr2 = S_MOV_B32 $vcc_lo
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_mov_vccz_modifies_sreg
+# GCN: $sgpr0_sgpr1 = S_MOV_B64 -1
+# GCN-NEXT: $sgpr0 = S_MOV_B32 0
+# GCN-NEXT: $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+# GCN-NEXT: S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+name: and_execz_mov_vccz_modifies_sreg
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $sgpr0_sgpr1 = S_MOV_B64 -1
+ $sgpr0 = S_MOV_B32 0
+ $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM
+...
+---
+# GCN-LABEL: name: and_execz_imm_vccz_liveout_scc
+# GCN: $vcc = S_AND_B64 $exec, -1, implicit-def $scc
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+# GCN-NEXT S_ENDPGM implicit $scc
+name: and_execz_imm_vccz_liveout_scc
+body: |
+ bb.0:
+ S_NOP 0
+
+ bb.1:
+ S_NOP 0
+
+ bb.2:
+ $vcc = S_AND_B64 $exec, -1, implicit-def $scc
+ S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+ S_ENDPGM implicit $scc
+...