From 7a9ad430fec98fa1a0d55812e42243a9ec603779 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Thu, 4 Jul 2019 13:29:45 +0000 Subject: [PATCH] [AMDGPU] Correct the setting of `FlatScratchInit`. Summary: - That flag setting should skip spilling stack slot. Reviewers: arsenm, rampitec Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D64143 llvm-svn: 365137 --- llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 13 ++++++++++++- llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir | 6 ++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index f7d6172..b73fead 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -145,9 +145,20 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) KernargSegmentPtr = true; if (ST.hasFlatAddressSpace() && isEntryFunction() && isAmdHsaOrMesa) { + auto hasNonSpillStackObjects = [&]() { + // Avoid expensive checking if there's no stack objects. + if (!HasStackObjects) + return false; + for (auto OI = FrameInfo.getObjectIndexBegin(), + OE = FrameInfo.getObjectIndexEnd(); OI != OE; ++OI) + if (!FrameInfo.isSpillSlotObjectIndex(OI)) + return true; + // All stack objects are spill slots. + return false; + }; // TODO: This could be refined a lot. The attribute is a poor way of // detecting calls that may require it before argument lowering. - if (HasStackObjects || F.hasFnAttribute("amdgpu-flat-scratch")) + if (hasNonSpillStackObjects() || F.hasFnAttribute("amdgpu-flat-scratch")) FlatScratchInit = true; } diff --git a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir index 3c1e565..e366a0e 100644 --- a/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/undefined-physreg-sgpr-spill.mir @@ -41,7 +41,7 @@ constants: body: | bb.0: successors: %bb.1, %bb.2 - liveins: $vgpr0, $sgpr4_sgpr5 + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) @@ -66,6 +66,7 @@ body: | bb.2: successors: + liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5) $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc @@ -105,7 +106,7 @@ constants: body: | bb.0: successors: %bb.1, %bb.2 - liveins: $vgpr0, $sgpr4_sgpr5 + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $vgpr1_vgpr2 = COPY killed $sgpr4_sgpr5, implicit $exec $vgpr1 = GLOBAL_LOAD_UBYTE killed $vgpr1_vgpr2, 0, 0, 0, 0, implicit $exec :: (non-temporal dereferenceable invariant load 1 from `i1 addrspace(4)* undef`) @@ -130,6 +131,7 @@ body: | bb.2: successors: + liveins: $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr13 $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $sgpr13, implicit-def dead $m0 :: (load 8 from %stack.0, align 4, addrspace 5) $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc -- 2.7.4