From 5073a622a785e8fd542fd15484970a435ef2e3e5 Mon Sep 17 00:00:00 2001 From: Anshil Gandhi Date: Tue, 17 Jan 2023 14:34:27 -0700 Subject: [PATCH] [MachineBasicBlock] Explicit FT branching param Introduce a parameter in getFallThrough() to optionally allow returning the fall through basic block in spite of an explicit branch instruction to it. This parameter is set to false by default. Introduce getLogicalFallThrough() which calls getFallThrough(false) to obtain the block while avoiding insertion of a jump instruction to its immediate successor. This patch also reverts the changes made by D134557 and solves the case where a jump is inserted after another jump (branch-relax-no-terminators.mir). Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D140790 --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 +- llvm/lib/CodeGen/BranchRelaxation.cpp | 26 +-- llvm/lib/CodeGen/MachineBasicBlock.cpp | 6 +- .../AMDGPU/branch-relax-indirect-branch.mir | 180 +++++++++++++++++++++ 4 files changed, 196 insertions(+), 29 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index f2fc266..1ab24b5 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -793,10 +793,15 @@ public: /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of - /// it. This should return null if it can reach the block after - /// it, but it uses an explicit branch to do so (e.g., a table - /// jump). Non-null return is a conservative answer. - MachineBasicBlock *getFallThrough(); + /// it. If an explicit branch to the fallthrough block is not allowed, + /// set JumpToFallThrough to be false. Non-null return is a conservative + /// answer. + MachineBasicBlock *getFallThrough(bool JumpToFallThrough = false); + + /// Return the fallthrough block if the block can implicitly + /// transfer control to it's successor, whether by a branch or + /// a fallthrough. Non-null return is a conservative answer. + MachineBasicBlock *getLogicalFallThrough() { return getFallThrough(true); } /// Return true if the block can implicitly transfer control to the /// block after it by falling off the end of it. This should return diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp index b243188..016c81d 100644 --- a/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -442,7 +442,6 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *MBB = MI.getParent(); - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; unsigned OldBrSize = TII->getInstSizeInBytes(MI); MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI); @@ -456,20 +455,6 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *BranchBB = MBB; - auto RemoveBranch = [&](MachineBasicBlock *MBB) { - unsigned &BBSize = BlockInfo[MBB->getNumber()].Size; - int RemovedSize = 0; - TII->removeBranch(*MBB, &RemovedSize); - BBSize -= RemovedSize; - }; - - auto InsertUncondBranch = [&](MachineBasicBlock *MBB, - MachineBasicBlock *Dst) { - TII->insertUnconditionalBranch(*MBB, Dst, DebugLoc()); - // Recalculate the block size. - BlockInfo[MBB->getNumber()].Size = computeBlockSize(*MBB); - }; - // If this was an expanded conditional branch, there is already a single // unconditional branch in a block. if (!MBB->empty()) { @@ -511,13 +496,10 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) { MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator()); // Fall through only if PrevBB has no unconditional branch as one of its // terminators. - if (TII->analyzeBranch(*PrevBB, TBB, FBB, Cond)) - report_fatal_error("Could not analyze terminators."); - if (!FBB) { - if (!Cond.empty() && TBB && TBB == DestBB) - RemoveBranch(PrevBB); - if (!TBB || (TBB && !Cond.empty())) - InsertUncondBranch(PrevBB, DestBB); + if (auto *FT = PrevBB->getLogicalFallThrough()) { + assert(FT == DestBB); + TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc()); + BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB); } // Now, RestoreBB could be placed directly before DestBB. MF->splice(DestBB->getIterator(), RestoreBB->getIterator()); diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index 499ff24..5ef377f 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -944,7 +944,7 @@ const MachineBasicBlock *MachineBasicBlock::getSingleSuccessor() const { return Successors.size() == 1 ? Successors[0] : nullptr; } -MachineBasicBlock *MachineBasicBlock::getFallThrough() { +MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) { MachineFunction::iterator Fallthrough = getIterator(); ++Fallthrough; // If FallthroughBlock is off the end of the function, it can't fall through. @@ -975,8 +975,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough() { // If there is some explicit branch to the fallthrough block, it can obviously // reach, even though the branch should get folded to fall through implicitly. - if (MachineFunction::iterator(TBB) == Fallthrough || - MachineFunction::iterator(FBB) == Fallthrough) + if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough)) return &*Fallthrough; // If it's an unconditional branch to some block not the fall through, it diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir new file mode 100644 index 0000000..6f35fd0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir @@ -0,0 +1,180 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa --amdgpu-s-branch-bits=5 -run-pass branch-relaxation %s -o - | FileCheck %s + +--- +name: branch_no_terminators +alignment: 1 +tracksRegLiveness: true +liveins: + - { reg: '$sgpr12' } +machineFunctionInfo: + stackPtrOffsetReg: '$sgpr32' + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' +body: | + ; CHECK-LABEL: name: branch_no_terminators + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.5(0x30000000) + ; CHECK-NEXT: liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: S_WAITCNT 0 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0 + ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr12 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0 + ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1 + ; CHECK-NEXT: S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.1, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol + ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags() , implicit-def $scc + ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags() , implicit-def $scc, implicit $scc + ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1 + ; CHECK-NEXT: $sgpr81 = S_MOV_B32 killed $sgpr82 + ; CHECK-NEXT: $sgpr82 = S_MOV_B32 killed $sgpr83 + ; CHECK-NEXT: $sgpr83 = S_MOV_B32 killed $sgpr84 + ; CHECK-NEXT: $sgpr84 = S_MOV_B32 killed $sgpr85 + ; CHECK-NEXT: $sgpr101 = S_MOV_B32 killed $vcc_lo + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1 + ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.7(0x7c000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32_XEXEC */, def renamable $sgpr4 + ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc + ; CHECK-NEXT: S_CBRANCH_SCC0 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol + ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, target-flags() , implicit-def $scc + ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, target-flags() , implicit-def $scc, implicit $scc + ; CHECK-NEXT: S_SETPC_B64 $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr4 = S_MOV_B32 killed $sgpr5 + ; CHECK-NEXT: $sgpr5 = S_MOV_B32 killed $sgpr6 + ; CHECK-NEXT: $sgpr6 = S_MOV_B32 killed $sgpr7 + ; CHECK-NEXT: $sgpr7 = S_MOV_B32 killed $sgpr8 + ; CHECK-NEXT: $sgpr8 = S_MOV_B32 killed $sgpr9 + ; CHECK-NEXT: $sgpr9 = S_MOV_B32 killed $sgpr10 + ; CHECK-NEXT: $sgpr10 = S_MOV_B32 killed $sgpr11 + ; CHECK-NEXT: S_SETPC_B64 $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $vgpr2, $sgpr33 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1 + ; CHECK-NEXT: $sgpr1 = V_READLANE_B32 killed $vgpr2, 1 + ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; CHECK-NEXT: $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $sgpr101 = V_READLANE_B32 $vgpr1, 6 + ; CHECK-NEXT: $sgpr100 = V_READLANE_B32 $vgpr1, 5 + ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec + ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; CHECK-NEXT: S_WAITCNT 3952 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 + bb.0.entry: + successors: %bb.1(0x50000000), %bb.4(0x30000000) + liveins: $sgpr12, $sgpr30, $sgpr31, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32 + + S_WAITCNT 0 + $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec + BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec + $exec = S_MOV_B64 killed $sgpr4_sgpr5 + $vgpr0 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr0 + $vgpr0 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr0 + $vgpr0 = V_WRITELANE_B32 $sgpr33, 2, $vgpr0 + $sgpr81 = S_MOV_B32 killed $sgpr12 + $vgpr0 = V_WRITELANE_B32 killed $sgpr94, 63, $vgpr0 + $vgpr1 = V_WRITELANE_B32 killed $sgpr101, 6, $vgpr1 + S_CMP_EQ_U32 killed renamable $sgpr81, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + + $vgpr1 = V_WRITELANE_B32 killed $sgpr81, 7, $vgpr1 + $sgpr81 = S_MOV_B32 killed $sgpr82 + $sgpr82 = S_MOV_B32 killed $sgpr83 + $sgpr83 = S_MOV_B32 killed $sgpr84 + $sgpr84 = S_MOV_B32 killed $sgpr85 + $sgpr101 = S_MOV_B32 killed $vcc_lo + + bb.2: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + + INLINEASM &"v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64\0A v_nop_e64", 1 /* sideeffect attdialect */, 2097162 /* regdef:SReg_32 */, def renamable $sgpr4 + S_CMP_LG_U32 killed renamable $sgpr4, 0, implicit-def $scc + S_CBRANCH_SCC1 %bb.2, implicit killed $scc + + bb.3: + liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1 + + $sgpr4 = S_MOV_B32 killed $sgpr5 + $sgpr5 = S_MOV_B32 killed $sgpr6 + $sgpr6 = S_MOV_B32 killed $sgpr7 + $sgpr7 = S_MOV_B32 killed $sgpr8 + $sgpr8 = S_MOV_B32 killed $sgpr9 + $sgpr9 = S_MOV_B32 killed $sgpr10 + $sgpr10 = S_MOV_B32 killed $sgpr11 + S_SETPC_B64 $sgpr4_sgpr5 + + bb.4: + liveins: $vcc_hi, $vcc_lo, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr32, $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3 + + $sgpr101 = V_READLANE_B32 $vgpr1, 6 + $sgpr100 = V_READLANE_B32 $vgpr1, 5 + $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec + $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec + $exec = S_MOV_B64 killed $sgpr4_sgpr5 + S_WAITCNT 3952 + S_SETPC_B64_return undef $sgpr30_sgpr31 + +... -- 2.7.4