From 5ebe91fcb2a19ca58bb7a2ef97e8a33a85f3ce77 Mon Sep 17 00:00:00 2001 From: Christudasan Devadasan Date: Sun, 17 Apr 2022 17:44:39 +0530 Subject: [PATCH] [AMDGPU] Correctly set IsKill flag for VGPR spills in the prolog We always assume the vector register is dead or killed while inserting the VGPR spills in the prolog. It is not always true. Used the entry block liveIn data while setting the flag. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D124194 --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 6 +- .../CodeGen/AMDGPU/av_spill_cross_bb_usage.mir | 2 +- .../CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir | 2 +- .../CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir | 2 +- .../test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir | 4 +- llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir | 2 +- .../CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir | 6 +- .../AMDGPU/spill-reg-tuple-super-reg-use.mir | 4 +- .../CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir | 104 +++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir | 4 +- 10 files changed, 121 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index b846bfc..7c22239 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -138,10 +138,12 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI), FrameInfo.getObjectAlign(FI)); LiveRegs.addReg(SpillReg); - TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true, + bool IsKill = !MBB.isLiveIn(SpillReg); + TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr, &LiveRegs); - LiveRegs.removeReg(SpillReg); + if (IsKill) + LiveRegs.removeReg(SpillReg); } static void buildEpilogRestore(const GCNSubtarget &ST, diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir index 530fa49..7209d16 100644 --- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir +++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir @@ -26,7 +26,7 @@ body: | ; GCN-NEXT: liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir index 691df23..8d609be 100644 --- a/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir +++ b/llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir @@ -17,7 +17,7 @@ body: | ; CHECK-NEXT: liveins: $sgpr42, $sgpr43, $sgpr46, $sgpr47, $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0 ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr43, 1, $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir index 236f9b5..e2d3dc5 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir @@ -31,7 +31,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir index b8151e9..76f533f 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir @@ -27,7 +27,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -51,7 +51,7 @@ body: | ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir index d6534b1..767cc1e 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir @@ -26,7 +26,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 262400, implicit-def dead $scc - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index c27e50a..ae337a6 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -28,7 +28,7 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -53,7 +53,7 @@ body: | ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -76,7 +76,7 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index 433bf28..b161907 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -23,7 +23,7 @@ body: | ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -60,7 +60,7 @@ body: | ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr8_sgpr9 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9 ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2 ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir new file mode 100644 index 0000000..93bbbe6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/track-spilled-vgpr-liveness.mir @@ -0,0 +1,104 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +# VGPR spill code in the prolog sets the `IsKill` flag for the spilled register +# based on the entry block's livein info. + +--- +name: vgpr_use_after_prolog_spill +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + + ; GCN-LABEL: name: vgpr_use_after_prolog_spill + ; GCN: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec + $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + S_NOP 0, implicit-def $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31 +... + +--- +name: livein_vgpr_def_after_prolog_spill +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31 + + ; GCN-LABEL: name: livein_vgpr_def_after_prolog_spill + ; GCN: liveins: $sgpr42, $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr1, implicit $exec + $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + S_NOP 0, implicit-def $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31 +... + +--- +name: livein_vgpr_not_referred_in_entry_bb +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: livein_vgpr_not_referred_in_entry_bb + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0 + ; GCN-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 + bb.0: + liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + S_NOP 0 + S_BRANCH %bb.1 + bb.1: + liveins: $sgpr42, $vgpr0, $sgpr30_sgpr31 + $vgpr0 = V_WRITELANE_B32 killed $sgpr42, 0, $vgpr0 + S_NOP 0, implicit-def $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31 +... diff --git a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir index 1a08820..04d335e 100644 --- a/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir +++ b/llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir @@ -41,7 +41,7 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 9961728, implicit-def dead $scc - ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5) + ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5) ; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc @@ -80,7 +80,7 @@ body: | ; FLATSCR-NEXT: {{ $}} ; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 155652, implicit-def dead $scc - ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5) + ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.20, addrspace 5) ; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 ; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc -- 2.7.4