From d78597ec08b9f8ebc7b9ea6627815b05647ec86d Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 21 Apr 2017 22:42:08 +0000 Subject: [PATCH] AArch64FrameLowering: Check if the ExtraCSSpill register is actually unused The code assumed that when saving an additional CSR register (ExtraCSSpill==true) we would have a free register throughout the function. This was not true if this CSR register is also used to pass values as in the swiftself case. rdar://31451816 llvm-svn: 301057 --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 12 ++-- llvm/test/CodeGen/AArch64/swiftself-scavenger.ll | 82 ++++++++++++++++++++++++ 2 files changed, 88 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/swiftself-scavenger.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 550174b..400e307 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1125,7 +1125,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, if (RegInfo->hasBasePointer(MF)) BasePointerReg = RegInfo->getBaseRegister(); - bool ExtraCSSpill = false; + unsigned ExtraCSSpill = 0; const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); // Figure out which callee-saved registers to save/restore. for (unsigned i = 0; CSRegs[i]; ++i) { @@ -1153,7 +1153,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.set(PairedReg); if (AArch64::GPR64RegClass.contains(PairedReg) && !RegInfo->isReservedReg(MF, PairedReg)) - ExtraCSSpill = true; + ExtraCSSpill = PairedReg; } } @@ -1186,8 +1186,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. - if (BigStack && !ExtraCSSpill) { - if (UnspilledCSGPR != AArch64::NoRegister) { + if (BigStack) { + if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) << " to get a scratch register.\n"); SavedRegs.set(UnspilledCSGPR); @@ -1196,13 +1196,13 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // store the pair. if (produceCompactUnwindFrame(MF)) SavedRegs.set(UnspilledCSGPRPaired); - ExtraCSSpill = true; + ExtraCSSpill = UnspilledCSGPRPaired; NumRegsSpilled = SavedRegs.count(); } // If we didn't find an extra callee-saved register to spill, create // an emergency spill slot. - if (!ExtraCSSpill) { + if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { const TargetRegisterClass *RC = &AArch64::GPR64RegClass; int FI = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false); RS->addScavengingFrameIndex(FI); diff --git a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll new file mode 100644 index 0000000..6d02784 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll @@ -0,0 +1,82 @@ +; RUN: llc -o - %s | FileCheck %s +; Check that we reserve an emergency spill slot, even if we added an extra +; CSR spill for the values used by the swiftself parameter. +; CHECK-LABEL: func: +; CHECK: str [[REG:x[0-9]+]], [sp, #8] +; CHECK: add [[REG]], sp, #248 +; CHECK: str xzr, [{{\s*}}[[REG]], #32760] +; CHECK: ldr x30, [sp, #8] +target triple = "arm64-apple-ios" + +@ptr8 = external global i8* +@ptr64 = external global i64 + +define hidden swiftcc void @func(i8* swiftself %arg) #0 { +bb: + %stack0 = alloca i8*, i32 5000, align 8 + %stack1 = alloca i8*, i32 32, align 8 + + %v0 = load volatile i64, i64* @ptr64, align 8 + %v1 = load volatile i64, i64* @ptr64, align 8 + %v2 = load volatile i64, i64* @ptr64, align 8 + %v3 = load volatile i64, i64* @ptr64, align 8 + %v4 = load volatile i64, i64* @ptr64, align 8 + %v5 = load volatile i64, i64* @ptr64, align 8 + %v6 = load volatile i64, i64* @ptr64, align 8 + %v7 = load volatile i64, i64* @ptr64, align 8 + %v8 = load volatile i64, i64* @ptr64, align 8 + %v9 = load volatile i64, i64* @ptr64, align 8 + %v10 = load volatile i64, i64* @ptr64, align 8 + %v11 = load volatile i64, i64* @ptr64, align 8 + %v12 = load volatile i64, i64* @ptr64, align 8 + %v13 = load volatile i64, i64* @ptr64, align 8 + %v14 = load volatile i64, i64* @ptr64, align 8 + %v15 = load volatile i64, i64* @ptr64, align 8 + %v16 = load volatile i64, i64* @ptr64, align 8 + %v17 = load volatile i64, i64* @ptr64, align 8 + %v18 = load volatile i64, i64* @ptr64, align 8 + %v19 = load volatile i64, i64* @ptr64, align 8 + %v20 = load volatile i64, i64* @ptr64, align 8 + %v21 = load volatile i64, i64* @ptr64, align 8 + %v22 = load volatile i64, i64* @ptr64, align 8 + %v23 = load volatile i64, i64* @ptr64, align 8 + %v24 = load volatile i64, i64* @ptr64, align 8 + %v25 = load volatile i64, i64* @ptr64, align 8 + + ; this should exceed stack-relative addressing limits and need an emergency + ; spill slot. + %s = getelementptr inbounds i8*, i8** %stack0, i64 4092 + store volatile i8* null, i8** %s + store volatile i8* null, i8** %stack1 + + store volatile i64 %v0, i64* @ptr64, align 8 + store volatile i64 %v1, i64* @ptr64, align 8 + store volatile i64 %v2, i64* @ptr64, align 8 + store volatile i64 %v3, i64* @ptr64, align 8 + store volatile i64 %v4, i64* @ptr64, align 8 + store volatile i64 %v5, i64* @ptr64, align 8 + store volatile i64 %v6, i64* @ptr64, align 8 + store volatile i64 %v7, i64* @ptr64, align 8 + store volatile i64 %v8, i64* @ptr64, align 8 + store volatile i64 %v9, i64* @ptr64, align 8 + store volatile i64 %v10, i64* @ptr64, align 8 + store volatile i64 %v11, i64* @ptr64, align 8 + store volatile i64 %v12, i64* @ptr64, align 8 + store volatile i64 %v13, i64* @ptr64, align 8 + store volatile i64 %v14, i64* @ptr64, align 8 + store volatile i64 %v15, i64* @ptr64, align 8 + store volatile i64 %v16, i64* @ptr64, align 8 + store volatile i64 %v17, i64* @ptr64, align 8 + store volatile i64 %v18, i64* @ptr64, align 8 + store volatile i64 %v19, i64* @ptr64, align 8 + store volatile i64 %v20, i64* @ptr64, align 8 + store volatile i64 %v21, i64* @ptr64, align 8 + store volatile i64 %v22, i64* @ptr64, align 8 + store volatile i64 %v23, i64* @ptr64, align 8 + store volatile i64 %v24, i64* @ptr64, align 8 + store volatile i64 %v25, i64* @ptr64, align 8 + + ; use swiftself parameter late so it stays alive throughout the function. + store volatile i8* %arg, i8** @ptr8 + ret void +} -- 2.7.4