From: Carl Ritson Date: Wed, 16 Feb 2022 02:56:54 +0000 (+0900) Subject: [MachineSink] Use SkipPHIsAndLabels for sink insertion points X-Git-Tag: upstream/15.0.7~16306 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ef949ecba57410e8b856f3246128312c79207933;p=platform%2Fupstream%2Fllvm.git [MachineSink] Use SkipPHIsAndLabels for sink insertion points For AMDGPU the insertion point for a block may not be the first non-PHI instruction. This happens when a block contains EXEC mask manipulation related to control flow (converging lanes). Use SkipPHIsAndLabels to determine the block insertion point so that the target can skip any block prologue instructions. Reviewed By: rampitec, ruiling Differential Revision: https://reviews.llvm.org/D119399 --- diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index e584ebe..7ed33f9 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1272,7 +1272,8 @@ bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { } LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); - SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); + SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, + I); // The instruction is moved from its basic block, so do not retain the // debug information. @@ -1392,9 +1393,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, } // Determine where to insert into. Skip phi nodes. - MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); - while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) - ++InsertPos; + MachineBasicBlock::iterator InsertPos = + SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin()); // Collect debug users of any vreg that this inst defines. SmallVector DbgUsersToSink; @@ -1796,7 +1796,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); - MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); + MachineBasicBlock::iterator InsertPos = + SuccBB->SkipPHIsAndLabels(SuccBB->begin()); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); diff --git a/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir new file mode 100644 index 0000000..4feef21 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sink-after-control-flow.mir @@ -0,0 +1,122 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX10 %s + +# Test that MachineSink pass respects block prologues when sinking instructions. +# Specifically an instruction must not be sunk before exec mask manipulation. + +--- +name: _amdgpu_hs_main +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX10-LABEL: name: _amdgpu_hs_main + ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 8 + ; GFX10-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[DEF]], 8, 5, implicit $exec + ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 5 + ; GFX10-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_1]], implicit $exec + ; GFX10-NEXT: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_]], -1, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_XOR_B32_1:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_]], implicit-def $scc + ; GFX10-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_]] + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.2(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GFX10-NEXT: S_BRANCH %bb.2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_1]], implicit-def $scc + ; GFX10-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[S_MOV_B32_]], [[DEF]], implicit $exec + ; GFX10-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 31 + ; GFX10-NEXT: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[V_BFE_U32_e64_]], killed [[S_MOV_B32_2]], implicit $exec + ; GFX10-NEXT: [[S_XOR_B32_2:%[0-9]+]]:sreg_32 = S_XOR_B32 [[V_CMP_NE_U32_e64_1]], -1, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_XOR_B32_2]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_XOR_B32_3:%[0-9]+]]:sreg_32 = S_XOR_B32 $exec_lo, [[S_AND_B32_1]], implicit-def $scc + ; GFX10-NEXT: $exec_lo = S_MOV_B32_term [[S_AND_B32_1]] + ; GFX10-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: successors: %bb.4(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: S_BRANCH %bb.4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: successors: %bb.5(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[S_XOR_B32_3]], implicit-def $scc + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX10-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX10-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX10-NEXT: [[V_LSHL_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[S_MOV_B32_4]], killed [[S_MOV_B32_3]], implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: $exec_lo = S_OR_B32 $exec_lo, [[DEF2]], implicit-def $scc + ; GFX10-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %0:sgpr_32 = IMPLICIT_DEF + %14:sreg_32 = IMPLICIT_DEF + %15:vgpr_32 = IMPLICIT_DEF + %16:sreg_32 = S_MOV_B32 8 + %17:vgpr_32 = V_LSHRREV_B32_e64 %16, %15, implicit $exec + %18:vgpr_32 = V_BFE_U32_e64 %15, 8, 5, implicit $exec + %19:sreg_32 = S_MOV_B32 5 + %20:sreg_32 = V_CMP_NE_U32_e64 %18, killed %19, implicit $exec + %21:sreg_32 = S_XOR_B32 %20, -1, implicit-def $scc + %22:sreg_32 = S_AND_B32 %21, $exec_lo, implicit-def $scc + %23:sreg_32 = S_XOR_B32 $exec_lo, %22, implicit-def $scc + $exec_lo = S_MOV_B32_term %22 + S_CBRANCH_EXECZ %bb.5, implicit $exec + S_BRANCH %bb.4 + + bb.4: + successors: %bb.5(0x80000000) + + S_BRANCH %bb.5 + + bb.5: + successors: %bb.6(0x40000000), %bb.7(0x40000000) + + $exec_lo = S_OR_B32 $exec_lo, %23, implicit-def $scc + %24:sreg_32 = S_MOV_B32 31 + %25:sreg_32 = V_CMP_NE_U32_e64 %18, killed %24, implicit $exec + %26:sreg_32 = S_XOR_B32 %25, -1, implicit-def $scc + %27:sreg_32 = S_AND_B32 %26, $exec_lo, implicit-def $scc + %28:sreg_32 = S_XOR_B32 $exec_lo, %27, implicit-def $scc + $exec_lo = S_MOV_B32_term %27 + S_CBRANCH_EXECZ %bb.7, implicit $exec + S_BRANCH %bb.6 + + bb.6: + successors: %bb.7(0x80000000) + + S_BRANCH %bb.7 + + bb.7: + successors: %bb.8(0x80000000) + + $exec_lo = S_OR_B32 $exec_lo, %28, implicit-def $scc + %29:sreg_32 = S_MOV_B32 16 + %30:sreg_32 = S_MOV_B32 4 + %31:vgpr_32 = nuw nsw V_LSHL_ADD_U32_e64 %17, %30, killed %29, implicit $exec + S_BRANCH %bb.8 + + bb.8: + $exec_lo = S_OR_B32 $exec_lo, %14, implicit-def $scc + S_ENDPGM 0 + +...