From 6c7a8d0b5f971fde03d98de53adc1db48120523e Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Fri, 4 Aug 2017 06:58:42 +0000 Subject: [PATCH] [AMDGPU] Preserve inverted bit in SI_IF in presence of SI_KILL In case if SI_KILL is in between of the SI_IF and SI_END_CF we need to preserve the bits actually flipped by if rather then restoring the original mask. Differential Revision: https://reviews.llvm.org/D36299 llvm-svn: 310031 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 38 ++++++++++-- .../CodeGen/AMDGPU/si-lower-control-flow-kill.ll | 71 ++++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 2 + 3 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index de86c19..8e19e15 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -134,6 +134,38 @@ static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) { char &llvm::SILowerControlFlowID = SILowerControlFlow::ID; +static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI) { + unsigned SaveExecReg = MI.getOperand(0).getReg(); + auto U = MRI->use_instr_nodbg_begin(SaveExecReg); + + if (U == MRI->use_instr_nodbg_end() || + std::next(U) != MRI->use_instr_nodbg_end() || + U->getOpcode() != AMDGPU::SI_END_CF) + return false; + + // Check for SI_KILL_TERMINATOR on path from if to endif. + // if there is any such terminator simplififcations are not safe. + auto SMBB = MI.getParent(); + auto EMBB = U->getParent(); + DenseSet Visited; + SmallVector Worklist(SMBB->succ_begin(), + SMBB->succ_end()); + + while (!Worklist.empty()) { + MachineBasicBlock *MBB = Worklist.pop_back_val(); + + if (MBB == EMBB || !Visited.insert(MBB).second) + continue; + for(auto &Term : MBB->terminators()) + if (Term.getOpcode() == AMDGPU::SI_KILL_TERMINATOR) + return false; + + Worklist.append(MBB->succ_begin(), MBB->succ_end()); + } + + return true; +} + void SILowerControlFlow::emitIf(MachineInstr &MI) { MachineBasicBlock &MBB = *MI.getParent(); const DebugLoc &DL = MI.getDebugLoc(); @@ -152,11 +184,7 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) { // If there is only one use of save exec register and that use is SI_END_CF, // we can optimize SI_IF by returning the full saved exec mask instead of // just cleared bits. - bool SimpleIf = false; - auto U = MRI->use_instr_nodbg_begin(SaveExecReg); - SimpleIf = U != MRI->use_instr_nodbg_end() && - std::next(U) == MRI->use_instr_nodbg_end() && - U->getOpcode() == AMDGPU::SI_END_CF; + bool SimpleIf = isSimpleIf(MI, MRI); // Add an implicit def of exec to discourage scheduling VALU after this which // will interfere with trying to form s_and_saveexec_b64 later. diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll new file mode 100644 index 0000000..d422510 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll @@ -0,0 +1,71 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: {{^}}if_with_kill: +; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]], +; GCN-NEXT: s_xor_b64 s[{{[0-9:]+}}], exec, [[SAVEEXEC]] +define amdgpu_ps void @if_with_kill(i32 %arg) { +.entry: + %cmp = icmp eq i32 %arg, 32 + br i1 %cmp, label %then, label %endif + +then: + tail call void @llvm.AMDGPU.kilp() + br label %endif + +endif: + ret void +} + +; GCN-LABEL: {{^}}if_with_loop_kill_after: +; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]], +; GCN-NEXT: s_xor_b64 s[{{[0-9:]+}}], exec, [[SAVEEXEC]] +define amdgpu_ps void @if_with_loop_kill_after(i32 %arg) { +.entry: + %cmp = icmp eq i32 %arg, 32 + br i1 %cmp, label %then, label %endif + +then: + %sub = sub i32 %arg, 1 + br label %loop + +loop: + %ind = phi i32 [%sub, %then], [%dec, %loop] + %dec = sub i32 %ind, 1 + %cc = icmp ne i32 %ind, 0 + br i1 %cc, label %loop, label %break + +break: + tail call void @llvm.AMDGPU.kilp() + br label %endif + +endif: + ret void +} + +; GCN-LABEL: {{^}}if_with_kill_inside_loop: +; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]], +; GCN-NEXT: s_xor_b64 s[{{[0-9:]+}}], exec, [[SAVEEXEC]] +define amdgpu_ps void @if_with_kill_inside_loop(i32 %arg) { +.entry: + %cmp = icmp eq i32 %arg, 32 + br i1 %cmp, label %then, label %endif + +then: + %sub = sub i32 %arg, 1 + br label %loop + +loop: + %ind = phi i32 [%sub, %then], [%dec, %loop] + %dec = sub i32 %ind, 1 + %cc = icmp ne i32 %ind, 0 + tail call void @llvm.AMDGPU.kilp() + br i1 %cc, label %loop, label %break + +break: + br label %endif + +endif: + ret void +} + +declare void @llvm.AMDGPU.kilp() diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 0c052ae..099fb59 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -202,6 +202,7 @@ exit: ; CHECK-LABEL: {{^}}test_kill_divergent_loop: ; CHECK: v_cmp_eq_u32_e32 vcc, 0, v0 ; CHECK-NEXT: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], vcc +; CHECK-NEXT: s_xor_b64 [[SAVEEXEC]], exec, [[SAVEEXEC]] ; CHECK-NEXT: ; mask branch [[EXIT:BB[0-9]+_[0-9]+]] ; CHECK-NEXT: s_cbranch_execz [[EXIT]] @@ -336,6 +337,7 @@ bb7: ; preds = %bb4 ; CHECK-LABEL: {{^}}if_after_kill_block: ; CHECK: ; BB#0: ; CHECK: s_and_saveexec_b64 +; CHECK: s_xor_b64 ; CHECK-NEXT: mask branch [[BB4:BB[0-9]+_[0-9]+]] ; CHECK: v_cmpx_le_f32_e32 vcc, 0, -- 2.7.4