From: Matt Arsenault Date: Fri, 28 Aug 2020 18:58:33 +0000 (-0400) Subject: AMDGPU: Fix incorrectly deleting copies after spilling SGPR tuples X-Git-Tag: llvmorg-13-init~13380 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9145d75226a071c530d14e051008e4c32e87cf5e;p=platform%2Fupstream%2Fllvm.git AMDGPU: Fix incorrectly deleting copies after spilling SGPR tuples The implicit def of the super register would appear to kill any live uses of components before the spill, and would be deleted by MachineCopyPropagation. We need to add implicit uses of the super register, similarly to what copyPhysReg does. VGPR tuples appear to be correctly handled already. I need to double check the SGPR->memory path. --- diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index fd83364..8a989998 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -999,7 +999,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB = MI->getParent(); MachineFunction *MF = MBB->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); - DenseSet SGPRSpillVGPRDefinedSet; + DenseSet SGPRSpillVGPRDefinedSet; // FIXME: This should be removed ArrayRef VGPRSpills = MFI->getSGPRToVGPRSpills(Index); @@ -1032,6 +1032,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; + bool UseKill = IsKill && i == NumSubRegs - 1; + // During SGPR spilling to VGPR, determine if the VGPR is defined. The // only circumstance in which we say it is undefined is when it is the // first spill to this VGPR in the first basic block. @@ -1044,7 +1046,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, auto MIB = BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32), Spill.VGPR) - .addReg(SubReg, getKillRegState(IsKill)) + .addReg(SubReg, getKillRegState(UseKill)) .addImm(Spill.Lane) .addReg(Spill.VGPR, VGPRDefined ? 0 : RegState::Undef); @@ -1054,6 +1056,9 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, MIB.addReg(SuperReg, RegState::ImplicitDefine); } + if (NumSubRegs > 1) + MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit); + // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir new file mode 100644 index 0000000..b0bcdb4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -0,0 +1,119 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-lower-sgpr-spills,prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# Make sure the initial first $sgpr1 = COPY $sgpr2 copy is not deleted +# by the copy propagation after lowering the spill. + +--- +name: spill_sgpr128_use_subreg +tracksRegLiveness: true +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: spill_sgpr128_use_subreg + ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GCN: renamable $sgpr1 = COPY $sgpr2 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: renamable $sgpr8 = COPY killed renamable $sgpr1 + ; GCN: S_ENDPGM 0, implicit $sgpr8 + renamable $sgpr1 = COPY $sgpr2 + SI_SPILL_S128_SAVE renamable $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + renamable $sgpr8 = COPY killed renamable $sgpr1 + S_ENDPGM 0, implicit $sgpr8 +... + +--- +name: spill_sgpr128_use_kill +tracksRegLiveness: true +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, stack-id: sgpr-spill, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: spill_sgpr128_use_kill + ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GCN: renamable $sgpr1 = COPY $sgpr2 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr0, 0, undef $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: $vgpr0 = V_WRITELANE_B32_vi killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: S_ENDPGM 0 + renamable $sgpr1 = COPY $sgpr2 + SI_SPILL_S128_SAVE renamable killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.0, implicit $exec, implicit $sgpr100_sgpr101_sgpr102_sgpr103, implicit $sgpr32 :: (store 16 into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... + +--- +name: spill_vgpr128_use_subreg +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; GCN-LABEL: name: spill_vgpr128_use_subreg + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GCN: renamable $vgpr1 = COPY $vgpr2 + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; GCN: renamable $vgpr8 = COPY $vgpr2 + ; GCN: S_ENDPGM 0, implicit $vgpr8 + renamable $vgpr1 = COPY $vgpr2 + SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + renamable $vgpr8 = COPY killed renamable $vgpr1 + S_ENDPGM 0, implicit $vgpr8 +... + +--- +name: spill_vgpr128_use_kill +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr100_sgpr101_sgpr102_sgpr103 + stackPtrOffsetReg: $sgpr32 + +stack: + - { id: 0, type: spill-slot, size: 16, alignment: 4 } + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + + ; GCN-LABEL: name: spill_vgpr128_use_kill + ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; GCN: renamable $vgpr1 = COPY $vgpr2 + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) + ; GCN: S_ENDPGM 0 + renamable $vgpr1 = COPY $vgpr2 + SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/spill192.mir b/llvm/test/CodeGen/AMDGPU/spill192.mir index 26f727d..25a2d6e 100644 --- a/llvm/test/CodeGen/AMDGPU/spill192.mir +++ b/llvm/test/CodeGen/AMDGPU/spill192.mir @@ -30,12 +30,12 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0 ; EXPANDED: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr4, 0, undef $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr5, 1, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr6, 2, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr7, 3, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr8, 4, $vgpr0 - ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr4, 0, undef $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: $vgpr0 = V_WRITELANE_B32_gfx6_gfx7 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000)