From 663f16684d1a5f129874b7be9e1f486682977bfa Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 14 Oct 2020 18:10:54 -0400 Subject: [PATCH] AMDGPU: Fix verifier error on killed spill of partially undef register This does unfortunately end up with extra waitcnts getting inserted that were avoided before. Ideally we would avoid the spills of these undef components in the first place. --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 24 +++- .../GlobalISel/extractelement-stack-lower.ll | 6 + .../CodeGen/AMDGPU/spill-agpr-partially-undef.mir | 70 ++++++++++++ llvm/test/CodeGen/AMDGPU/spill-agpr.mir | 16 +-- .../AMDGPU/spill-reg-tuple-super-reg-use.mir | 6 +- llvm/test/CodeGen/AMDGPU/vgpr-spill.mir | 126 +++++++++++++++++++++ 6 files changed, 233 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-spill.mir diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 7b1970f..9782e11 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -799,20 +799,29 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, SrcDstRegState |= getKillRegState(IsKill); } + // Make sure the whole register is defined if there are undef components by + // adding an implicit def of the super-reg on the first instruction. + const bool NeedSuperRegDef = NumSubRegs > 1 && IsStore && i == 0; + auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill); if (!MIB.getInstr()) { unsigned FinalReg = SubReg; - if (hasAGPRs(RC)) { + + const bool IsAGPR = hasAGPRs(RC); + if (IsAGPR) { if (!TmpReg) { assert(RS && "Needs to have RegScavenger to spill an AGPR!"); // FIXME: change to scavengeRegisterBackwards() TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); RS->setRegUsed(TmpReg); } - if (IsStore) - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg) + if (IsStore) { + auto AccRead = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg) .addReg(SubReg, getKillRegState(IsKill)); + if (NeedSuperRegDef) + AccRead.addReg(ValueReg, RegState::ImplicitDefine); + } SubReg = TmpReg; } @@ -838,14 +847,21 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, .addImm(0) // swz .addMemOperand(NewMMO); + if (!IsAGPR && NeedSuperRegDef) + MIB.addReg(ValueReg, RegState::ImplicitDefine); + if (!IsStore && TmpReg != AMDGPU::NoRegister) MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32), FinalReg) .addReg(TmpReg, RegState::Kill); + } else { + if (NeedSuperRegDef) + MIB.addReg(ValueReg, RegState::ImplicitDefine); } - if (NumSubRegs > 1) + if (NumSubRegs > 1) { MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState); + } } if (ScratchOffsetRegDelta != 0) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 4f9668f..6fd99b8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -59,6 +59,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill @@ -78,6 +79,7 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill @@ -342,6 +344,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill @@ -361,6 +364,7 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill @@ -630,6 +634,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill @@ -649,6 +654,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[43:46], v[59:60], off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir new file mode 100644 index 0000000..bdb4d04 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK -check-prefix=GFX908 %s + +--- +name: spill_a64_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $agpr0_agpr1 + + ; CHECK-LABEL: name: spill_a64_kill + ; CHECK: liveins: $agpr0_agpr1 + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +# Make sure there's no verifier error on the undef spill component when the value is killed. + +--- +name: spill_a64_undef_sub1_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $agpr0 + + ; CHECK-LABEL: name: spill_a64_undef_sub1_killed + ; CHECK: liveins: $agpr0 + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +--- +name: spill_a64_undef_sub0_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $agpr1 + + ; CHECK-LABEL: name: spill_a64_undef_sub0_killed + ; CHECK: liveins: $agpr1 + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir index c817b97..6d5af02 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -78,7 +78,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit killed $agpr0_agpr1 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: @@ -220,7 +220,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2 ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc @@ -269,7 +269,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 @@ -320,7 +320,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 @@ -373,7 +373,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 @@ -428,7 +428,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 @@ -487,7 +487,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 @@ -562,7 +562,7 @@ body: | ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $vgpr3 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir index b0bcdb4..d95ee97 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir @@ -79,11 +79,11 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_subreg ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN: renamable $vgpr1 = COPY $vgpr2 - ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) - ; GCN: renamable $vgpr8 = COPY $vgpr2 + ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr1 ; GCN: S_ENDPGM 0, implicit $vgpr8 renamable $vgpr1 = COPY $vgpr2 SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5) @@ -108,7 +108,7 @@ body: | ; GCN-LABEL: name: spill_vgpr128_use_kill ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; GCN: renamable $vgpr1 = COPY $vgpr2 - ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir new file mode 100644 index 0000000..240f677 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir @@ -0,0 +1,126 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=CHECK -check-prefix=GCN64 %s + +--- +name: spill_v32 +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: spill_v32 + ; CHECK: liveins: $vgpr0 + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; CHECK: S_NOP 0, implicit $vgpr0 + SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + S_NOP 0, implicit $vgpr0 +... + +--- +name: spill_v32_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: spill_v32_kill + ; CHECK: liveins: $vgpr0 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) +... + +--- +name: spill_v64 +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: spill_v64 + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + ; CHECK: S_NOP 0, implicit $vgpr0_vgpr1 + SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) + S_NOP 0, implicit $vgpr0_vgpr1 +... + +--- +name: spill_v64_kill +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: spill_v64_kill + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +# Make sure there's no verifier error on the undef spill component when the value is killed. + +--- +name: spill_v64_undef_sub1_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: spill_v64_undef_sub1_killed + ; CHECK: liveins: $vgpr0 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... + +--- +name: spill_v64_undef_sub0_killed +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 8, alignment: 4 } +machineFunctionInfo: + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' +body: | + bb.0: + liveins: $vgpr1 + + ; CHECK-LABEL: name: spill_v64_undef_sub0_killed + ; CHECK: liveins: $vgpr1 + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5) + ; CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5) + SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, addrspace 5) +... -- 2.7.4