From 52cb3af08c2ac917557d67bbbf8a91cb80bed6fc Mon Sep 17 00:00:00 2001 From: hsmahesha Date: Tue, 12 Oct 2021 09:58:34 +0530 Subject: [PATCH] [AMDGPU] Remove dead frame indices after sgpr spill. All those frame indices which are dead after sgpr spill should be removed from the function frame. Othewise, there is a side effect such as re-mapping of free frame index ids by the later pass(es) like "stack slot coloring" which in turn could mess-up with the book keeping of "frame index to VGPR lane". Reviewed By: cdevadas Differential Revision: https://reviews.llvm.org/D111150 --- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 7 +++ llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 12 +++- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 4 ++ .../sgpr-spill-incorrect-fi-bookkeeping-bug.ll | 65 ++++++++++++++++++++++ 4 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 8080e09..3822ba3 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -373,6 +373,13 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) { } } + // All those frame indices which are dead by now should be removed from the + // function frame. Othewise, there is a side effect such as re-mapping of + // free frame index ids by the later pass(es) like "stack slot coloring" + // which in turn could mess-up with the book keeping of "frame index to VGPR + // lane". + FuncInfo->removeDeadFrameIndices(MFI); + MadeChange = true; } else if (FuncInfo->VGPRReservedForSGPRSpill) { FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 3874c1b..32a447f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -442,10 +442,16 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF, } void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { - // The FP & BP spills haven't been inserted yet, so keep them around. - for (auto &R : SGPRToVGPRSpills) { - if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) + // Remove dead frame indices from function frame, however keep FP & BP since + // spills for them haven't been inserted yet. And also make sure to remove the + // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could + // result in an unexpected side effect and bug, in case of any re-mapping of + // freed frame indices by later pass(es) like "stack slot coloring". + for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) { + if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) { MFI.RemoveStackObject(R.first); + SGPRToVGPRSpills.erase(R.first); + } } // All other SPGRs must be allocated on the default stack, so reset the stack diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 48c76cc..2896ec8 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1350,6 +1350,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, SB.SuperReg != SB.MFI.getFrameOffsetReg())); if (SpillToVGPR) { + + assert(SB.NumSubRegs == VGPRSpills.size() && + "Num of VGPR lanes should be equal to num of SGPRs spilled"); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { Register SubReg = SB.NumSubRegs == 1 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll new file mode 100644 index 0000000..741c564 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-incorrect-fi-bookkeeping-bug.ll @@ -0,0 +1,65 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +; This tests for a bug that caused a crash in SIRegisterInfo::spillSGPR() +; which was due to incorrect book-keeping of removed dead frame indices. + +; CHECK-LABEL: {{^}}kernel0: +define amdgpu_kernel void @kernel0(i32 addrspace(1)* %out, i32 %in) #1 { + call void asm sideeffect "", "~{v[0:7]}" () #0 + call void asm sideeffect "", "~{v[8:15]}" () #0 + call void asm sideeffect "", "~{v[16:19]}"() #0 + call void asm sideeffect "", "~{v[20:21]}"() #0 + call void asm sideeffect "", "~{v22}"() #0 + + %val0 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 + %val1 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 + %val2 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 + %val3 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %val4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 + %val5 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 + %val6 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 + %val7 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %val8 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 + %val9 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 + %val10 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 + %val11 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %val12 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 + %val13 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 + %val14 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 + %val15 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %val16 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0 + %val17 = call <4 x i32> asm sideeffect "; def $0", "=s" () #0 + %val18 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0 + %val19 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0 + %cmp = icmp eq i32 %in, 0 + br i1 %cmp, label %bb0, label %ret + +bb0: + call void asm sideeffect "; use $0", "s"(<2 x i32> %val0) #0 + call void asm sideeffect "; use $0", "s"(<4 x i32> %val1) #0 + call void asm sideeffect "; use $0", "s"(<8 x i32> %val2) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %val3) #0 + call void asm sideeffect "; use $0", "s"(<2 x i32> %val4) #0 + call void asm sideeffect "; use $0", "s"(<4 x i32> %val5) #0 + call void asm sideeffect "; use $0", "s"(<8 x i32> %val6) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %val7) #0 + call void asm sideeffect "; use $0", "s"(<2 x i32> %val8) #0 + call void asm sideeffect "; use $0", "s"(<4 x i32> %val9) #0 + call void asm sideeffect "; use $0", "s"(<8 x i32> %val10) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %val11) #0 + call void asm sideeffect "; use $0", "s"(<2 x i32> %val12) #0 + call void asm sideeffect "; use $0", "s"(<4 x i32> %val13) #0 + call void asm sideeffect "; use $0", "s"(<8 x i32> %val14) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %val15) #0 + call void asm sideeffect "; use $0", "s"(<2 x i32> %val16) #0 + call void asm sideeffect "; use $0", "s"(<4 x i32> %val17) #0 + call void asm sideeffect "; use $0", "s"(<8 x i32> %val18) #0 + call void asm sideeffect "; use $0", "s"(<16 x i32> %val19) #0 + br label %ret + +ret: + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "amdgpu-waves-per-eu"="10,10" } -- 2.7.4