From: Sebastian Neubauer Date: Tue, 6 Jul 2021 09:23:06 +0000 (+0200) Subject: [AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP X-Git-Tag: llvmorg-14-init~1654 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9d72c0ad43e720ef2394a23a2f4c58f79d753f03;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP This way, they can be detected later, e.g. by the SIOptimizeVGPRLiveRange pass. Differential Revision: https://reviews.llvm.org/D105467 --- diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 8dc7489..d0f5b2d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5278,7 +5278,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI, .addReg(Exec) .addReg(SaveExec); - BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB); + BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); } // Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0475963..fbf4634 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -318,6 +318,14 @@ def SI_ELSE : CFPseudoInstSI < let hasSideEffects = 1; } +def SI_WATERFALL_LOOP : CFPseudoInstSI < + (outs), + (ins brtarget:$target), [], 1> { + let Size = 8; + let isBranch = 1; + let Defs = []; +} + def SI_LOOP : CFPseudoInstSI < (outs), (ins SReg_1:$saved, brtarget:$target), [(AMDGPUloop i1:$saved, bb:$target)], 1, 1> { diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index edfafd6..0f2836e 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -600,6 +600,10 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) { emitLoop(MI); break; + case AMDGPU::SI_WATERFALL_LOOP: + MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ)); + break; + case AMDGPU::SI_END_CF: SplitBB = emitEndCf(MI); break; @@ -840,6 +844,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::SI_IF: case AMDGPU::SI_ELSE: case AMDGPU::SI_IF_BREAK: + case AMDGPU::SI_WATERFALL_LOOP: case AMDGPU::SI_LOOP: case AMDGPU::SI_END_CF: SplitMBB = process(MI); diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir index 256c150..8e33900 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -30,7 +30,7 @@ # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-LABEL: bb.2: # W64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -55,7 +55,7 @@ # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]] --- @@ -103,7 +103,7 @@ body: | # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-LABEL: bb.2: # W64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -128,7 +128,7 @@ body: | # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]] --- @@ -176,7 +176,7 @@ body: | # W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-LABEL: bb.2: # W64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -201,7 +201,7 @@ body: | # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]] --- @@ -286,7 +286,7 @@ body: | # W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec # W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc -# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec # W64-NO-ADDR64-LABEL: bb.2: # W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]] @@ -309,7 +309,7 @@ body: | # W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec # TODO: S_XOR_B32_term should be `implicit-def $scc` # W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]] -# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec +# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec # W32-LABEL: bb.2: # W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]