From: Matt Arsenault Date: Mon, 6 Jan 2020 03:09:24 +0000 (-0500) Subject: AMDGPU/GlobalISel: Fix branch targets when emitting SI_IF X-Git-Tag: llvmorg-11-init~233 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ca19d7a3993c69633826ae388155c9ad176b11df;p=platform%2Fupstream%2Fllvm.git AMDGPU/GlobalISel: Fix branch targets when emitting SI_IF The branch target needs to be changed depending on whether there is an unconditional branch or not. Loops also need to be similarly fixed, but compiling a simple testcase end to end requires another set of patches that aren't upstream yet. --- diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 4d14a85..3f99d5c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1805,14 +1805,26 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg( // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, - MachineRegisterInfo &MRI) { + MachineRegisterInfo &MRI, + MachineInstr *&Br) { Register CondDef = MI.getOperand(0).getReg(); if (!MRI.hasOneNonDBGUse(CondDef)) return nullptr; MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef); - return UseMI.getParent() == MI.getParent() && - UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr; + if (UseMI.getParent() != MI.getParent() || + UseMI.getOpcode() != AMDGPU::G_BRCOND) + return nullptr; + + // Make sure the cond br is followed by a G_BR + MachineBasicBlock::iterator Next = std::next(UseMI.getIterator()); + if (Next != MI.getParent()->end()) { + if (Next->getOpcode() != AMDGPU::G_BR) + return nullptr; + Br = &*Next; + } + + return &UseMI; } Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI, @@ -2341,7 +2353,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, switch (IntrID) { case Intrinsic::amdgcn_if: case Intrinsic::amdgcn_else: { - if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { + MachineInstr *Br = nullptr; + if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) { const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); @@ -2349,19 +2362,26 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, Register Def = MI.getOperand(1).getReg(); Register Use = MI.getOperand(3).getReg(); + MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB(); + if (Br) + BrTarget = Br->getOperand(0).getMBB(); + if (IntrID == Intrinsic::amdgcn_if) { B.buildInstr(AMDGPU::SI_IF) .addDef(Def) .addUse(Use) - .addMBB(BrCond->getOperand(1).getMBB()); + .addMBB(BrTarget); } else { B.buildInstr(AMDGPU::SI_ELSE) .addDef(Def) .addUse(Use) - .addMBB(BrCond->getOperand(1).getMBB()) + .addMBB(BrTarget) .addImm(0); } + if (Br) + Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB()); + MRI.setRegClass(Def, TRI->getWaveMaskRegClass()); MRI.setRegClass(Use, TRI->getWaveMaskRegClass()); MI.eraseFromParent(); @@ -2372,11 +2392,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, return false; } case Intrinsic::amdgcn_loop: { - if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { + MachineInstr *Br = nullptr; + if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) { const SIRegisterInfo *TRI = static_cast(MRI.getTargetRegisterInfo()); B.setInstr(*BrCond); + + // FIXME: Need to adjust branch targets based on unconditional branch. Register Reg = MI.getOperand(2).getReg(); B.buildInstr(AMDGPU::SI_LOOP) .addUse(Reg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll new file mode 100644 index 0000000..40e1820 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s + +; Make sure the branch targets are correct after lowering llvm.amdgcn.if + +define i32 @divergent_if_swap_brtarget_order0(i32 %value) { +; CHECK-LABEL: divergent_if_swap_brtarget_order0: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: ; implicit-def: $vgpr0 +; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc +; CHECK-NEXT: ; mask branch BB0_2 +; CHECK-NEXT: s_cbranch_execz BB0_2 +; CHECK-NEXT: BB0_1: ; %if.true +; CHECK-NEXT: global_load_dword v0, v[0:1], off +; CHECK-NEXT: BB0_2: ; %endif +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %c = icmp ne i32 %value, 0 + br i1 %c, label %if.true, label %endif + +if.true: + %val = load volatile i32, i32 addrspace(1)* undef + br label %endif + +endif: + %v = phi i32 [ %val, %if.true ], [ undef, %entry ] + ret i32 %v +} + +define i32 @divergent_if_swap_brtarget_order1(i32 %value) { +; CHECK-LABEL: divergent_if_swap_brtarget_order1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: ; implicit-def: $vgpr0 +; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc +; CHECK-NEXT: ; mask branch BB1_2 +; CHECK-NEXT: BB1_1: ; %endif +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: BB1_2: ; %if.true +; CHECK-NEXT: global_load_dword v0, v[0:1], off +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + %c = icmp ne i32 %value, 0 + br i1 %c, label %if.true, label %endif + +endif: + %v = phi i32 [ %val, %if.true ], [ undef, %entry ] + ret i32 %v + +if.true: + %val = load volatile i32, i32 addrspace(1)* undef + br label %endif +}