From: Piotr Sobczak Date: Thu, 17 Sep 2020 12:21:23 +0000 (+0200) Subject: [AMDGPU] Fix merging m0 inits X-Git-Tag: llvmorg-13-init~11206 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=8d7fd73c3a8ce069cfe48dfcf949b4a59c05c673;p=platform%2Fupstream%2Fllvm.git [AMDGPU] Fix merging m0 inits Fix incorrect merges of m0 inits in loops. It was assumed that if a clobbering instruction appears in the same block as an init and the clobbering instruction does not dominate the init then it does not interfere with init. This does not work in the presence of loops, where in this scenario, the clobbering instruction does interfere with the init in another iteration. To fix this, do not check for block equality and defer the decision to the predecessor check. Differential Revision: https://reviews.llvm.org/D87882 --- diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 775cec6..a6df41f 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -386,17 +386,13 @@ static bool isReachable(const MachineInstr *From, const MachineInstr *To, const MachineBasicBlock *CutOff, MachineDominatorTree &MDT) { - // If either From block dominates To block or instructions are in the same - // block and From is higher. if (MDT.dominates(From, To)) return true; const MachineBasicBlock *MBBFrom = From->getParent(); const MachineBasicBlock *MBBTo = To->getParent(); - if (MBBFrom == MBBTo) - return false; - // Instructions are in different blocks, do predecessor search. + // Do predecessor search. // We should almost never get here since we do not usually produce M0 stores // other than -1. return searchPredecessors(MBBTo, CutOff, [MBBFrom] diff --git a/llvm/test/CodeGen/AMDGPU/merge-m0.mir b/llvm/test/CodeGen/AMDGPU/merge-m0.mir index 0afc5d1..81d9491 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-m0.mir @@ -295,3 +295,386 @@ body: | SI_INIT_M0 -1, implicit-def $m0 DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec ... + + +# GCN-LABEL: name: m0-in-loop-0 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: $m0 = COPY %2 + +--- +name: m0-in-loop-0 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-1 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN-NOT: SI_INIT_M0 -1 +# GCN: DS_WRITE_B32 + +--- +name: m0-in-loop-1 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-2 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: $m0 = COPY %2 +# GCN-NEXT: SENDMSG +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 + +--- +name: m0-in-loop-2 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-3 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: $m0 = COPY %2 +# GCN-NEXT: SENDMSG +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 + +--- +name: m0-in-loop-3 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-4 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: $m0 = COPY %2 +# GCN-NEXT: SENDMSG + +--- +name: m0-in-loop-4 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-5 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 65536 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: $m0 = COPY %2 +# GCN-NEXT: SENDMSG + +--- +name: m0-in-loop-5 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + + SI_INIT_M0 65536, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-6 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: $m0 = COPY %2 +# GCN-NEXT: SENDMSG + +--- +name: m0-in-loop-6 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3, %bb.1 + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.3 + + + bb.3: + S_ENDPGM 0 +... + +# GCN-LABEL: name: m0-in-loop-7 +# GCN: bb.0.entry: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: IMPLICIT_DEF +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.1: +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: DS_WRITE_B32 +# GCN-NEXT: DS_WRITE_B32 + +# GCN: bb.2: +# GCN: $m0 = COPY %2 +# GCN-NEXT: SENDMSG + +--- +name: m0-in-loop-7 +registers: + - { id: 0, class: vgpr_32 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: sgpr_32 } +body: | + bb.0.entry: + successors: %bb.1 + + %0 = IMPLICIT_DEF + %1 = IMPLICIT_DEF + %2 = IMPLICIT_DEF + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + SI_INIT_M0 -1, implicit-def $m0 + DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3, %bb.1 + $m0 = COPY %2:sgpr_32 + S_SENDMSG 34, implicit $exec, implicit $m0 + S_CBRANCH_VCCZ %bb.1, implicit undef $vcc + S_BRANCH %bb.3 + + bb.3: + S_ENDPGM 0 +...