From 17cb8982f4ac46311509b440c5e73041bbee41ab Mon Sep 17 00:00:00 2001 From: Kristof Beyls Date: Thu, 9 Apr 2015 08:49:47 +0000 Subject: [PATCH] [AArch64] Add support for dynamic stack alignment Differential Revision: http://reviews.llvm.org/D8876 llvm-svn: 234471 --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 180 ++++++-- llvm/lib/Target/AArch64/AArch64FrameLowering.h | 2 +- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 30 ++ llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 3 + .../AArch64/aarch64-dynamic-stack-layout.ll | 491 +++++++++++++++++++++ 5 files changed, 663 insertions(+), 43 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 84bf317..01716c3 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -9,6 +9,82 @@ // // This file contains the AArch64 implementation of TargetFrameLowering class. // +// On AArch64, stack frames are structured as follows: +// +// The stack grows downward. +// +// All of the individual frame areas on the frame below are optional, i.e. it's +// possible to create a function so that the particular area isn't present +// in the frame. +// +// At function entry, the "frame" looks as follows: +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| <- sp +// | | Lower address +// +// +// After the prologue has run, the frame has the following general structure. +// Note that this doesn't depict the case where a red-zone is used. Also, +// technically the last frame area (VLAs) doesn't get created until in the +// main function body, after the prologue is run. However, it's depicted here +// for completeness. +// +// | | Higher address +// |-----------------------------------| +// | | +// | arguments passed on the stack | +// | | +// |-----------------------------------| +// | | +// | prev_fp, prev_lr | +// | (a.k.a. "frame record") | +// |-----------------------------------| <- fp(=x29) +// | | +// | other callee-saved registers | +// | | +// |-----------------------------------| +// |.empty.space.to.make.part.below....| +// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at +// |.the.standard.16-byte.alignment....| compile time; if present) +// |-----------------------------------| +// | | +// | local variables of fixed size | +// | including spill slots | +// |-----------------------------------| <- bp(not defined by ABI, +// |.variable-sized.local.variables....| LLVM chooses X19) +// |.(VLAs)............................| (size of this area is unknown at +// |...................................| compile time) +// |-----------------------------------| <- sp +// | | Lower address +// +// +// To access the data in a frame, at-compile time, a constant offset must be +// computable from one of the pointers (fp, bp, sp) to access it. The size +// of the areas with a dotted background cannot be computed at compile-time +// if they are present, making it required to have all three of fp, bp and +// sp to be set up to be able to access all contents in the frame areas, +// assuming all of the frame areas are non-empty. +// +// For most functions, some of the frame areas are empty. For those functions, +// it may not be necessary to set up fp or bp: +// * A base pointer is definitly needed when there are both VLAs and local +// variables with more-than-default alignment requirements. +// * A frame pointer is definitly needed when there are local variables with +// more-than-default alignment requirements. +// +// In some cases when a base pointer is not strictly needed, it is generated +// anyway when offsets from the frame pointer to access local variables become +// so large that the offset can't be encoded in the immediate fields of loads +// or stores. +// +// FIXME: also explain the redzone concept. +// FIXME: also explain the concept of reserved call frames. +// //===----------------------------------------------------------------------===// #include "AArch64FrameLowering.h" @@ -39,26 +115,6 @@ static cl::opt EnableRedZone("aarch64-redzone", STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); -static unsigned estimateStackSize(MachineFunction &MF) { - const MachineFrameInfo *FFI = MF.getFrameInfo(); - int Offset = 0; - for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { - int FixedOff = -FFI->getObjectOffset(i); - if (FixedOff > Offset) - Offset = FixedOff; - } - for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { - if (FFI->isDeadObjectIndex(i)) - continue; - Offset += FFI->getObjectSize(i); - unsigned Align = FFI->getObjectAlignment(i); - // Adjust to alignment boundary - Offset = (Offset + Align - 1) / Align * Align; - } - // This does not include the 16 bytes used for fp and lr. - return (unsigned)Offset; -} - bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { /// pointer register. bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - -#ifndef NDEBUG const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); - assert(!RegInfo->needsStackRealignment(MF) && - "No stack realignment on AArch64!"); -#endif - return (MFI->hasCalls() || MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || MFI->hasStackMap() || - MFI->hasPatchPoint()); + MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF)); } /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is @@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setLocalStackSize(NumBytes); // Allocate space for the rest of the frame. - if (NumBytes) { - // If we're a leaf function, try using the red zone. - if (!canUseRedZone(MF)) - emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + + const unsigned Alignment = MFI->getMaxAlignment(); + const bool NeedsRealignment = (Alignment > 16); + unsigned scratchSPReg = AArch64::SP; + if (NeedsRealignment) { + // Use the first callee-saved register as a scratch register + assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) && + "No scratch register to align SP!"); + scratchSPReg = AArch64::X9; + } + + // If we're a leaf function, try using the red zone. + if (NumBytes && !canUseRedZone(MF)) + // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have + // the correct value here, as NumBytes also includes padding bytes, + // which shouldn't be counted here. + emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, + MachineInstr::FrameSetup); + + assert(!(NeedsRealignment && NumBytes==0) && + "NumBytes should never be 0 when realignment is needed"); + + if (NumBytes && NeedsRealignment) { + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(NrBitsToZero > 1); + assert(scratchSPReg != AArch64::SP); + + // SUB X9, SP, NumBytes + // -- X9 is temporary register, so shouldn't contain any live data here, + // -- free to use. This is already produced by emitFrameOffset above. + // AND SP, X9, 0b11111...0000 + // The logical immediates have a non-trivial encoding. The following + // formula computes the encoded immediate with all ones but + // NrBitsToZero zero bits as least significant bits. + uint32_t andMaskEncoded = + (1 <<12) // = N + | ((64-NrBitsToZero) << 6) // immr + | ((64-NrBitsToZero-1) << 0) // imms + ; + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) + .addReg(scratchSPReg, RegState::Kill) + .addImm(andMaskEncoded); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - // - if (RegInfo->hasBasePointer(MF)) - TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false); + if (RegInfo->hasBasePointer(MF)) { + TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, + false); + } if (needsFrameMoves) { const DataLayout *TD = MF.getTarget().getDataLayout(); const int StackGrowth = -TD->getPointerSize(0); unsigned FramePtr = RegInfo->getFrameRegister(MF); - // An example of the prologue: // // .globl __foo @@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; - // Initial and residual are named for consitency with the prologue. Note that + // Initial and residual are named for consistency with the prologue. Note that // in the epilogue, the residual adjustment is executed first. uint64_t ArgumentPopSize = 0; if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) { @@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, bool isFixed = MFI->isFixedObjectIndex(FI); // Use frame pointer to reference fixed objects. Use it for locals if - // there are VLAs (and thus the SP isn't reliable as a base). - // Make sure useFPForScavengingIndex() does the right thing for the emergency - // spill slot. + // there are VLAs or a dynamically realigned SP (and thus the SP isn't + // reliable as a base). Make sure useFPForScavengingIndex() does the + // right thing for the emergency spill slot. bool UseFP = false; if (AFI->hasStackFrame()) { // Note: Keeping the following as multiple 'if' statements rather than @@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, // Argument access should always use the FP. if (isFixed) { UseFP = hasFP(MF); - } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) { + } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && + !RegInfo->needsStackRealignment(MF)) { // Use SP or FP, whichever gives us the best chance of the offset // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. @@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, } } + assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && + "In the presence of dynamic stack pointer realignment, " + "non-argument objects cannot be accessed through the frame pointer"); + if (UseFP) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -794,6 +886,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( if (RegInfo->hasBasePointer(MF)) MRI->setPhysRegUsed(RegInfo->getBaseRegister()); + if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) + MRI->setPhysRegUsed(AArch64::X9); + // If any callee-saved registers are used, the frame cannot be eliminated. unsigned NumGPRSpilled = 0; unsigned NumFPRSpilled = 0; @@ -867,7 +962,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan( // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = + MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h index df3875f..1439bf3 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering { public: explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/) {} + true /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 33c11fe..1836682 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { // large enough that referencing from the FP won't result in things being // in range relatively often, we can use a base pointer to allow access // from the other direction like the SP normally works. + // Furthermore, if both variable sized objects are present, and the + // stack needs to be dynamically re-aligned, the base pointer is the only + // reliable way to reference the locals. if (MFI->hasVarSizedObjects()) { + if (needsStackRealignment(MF)) + return true; // Conservatively estimate whether the negative offset from the frame // pointer will be sufficient to reach. If a function has a smallish // frame, it's less likely to have lots of spills and callee saved @@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { return false; } +bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const { + + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) + return false; + + return true; +} + +// FIXME: share this with other backends with identical implementation? +bool +AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const Function *F = MF.getFunction(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl(*MF.getFunction()) + ->getFrameLowering() + ->getStackAlignment(); + bool requiresRealignment = + ((MFI->getMaxAlignment() > StackAlign) || + F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)); + + return requiresRealignment && canRealignStack(MF); +} + unsigned AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index c01bfa5..8c379d9 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -93,6 +93,9 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; + // Base pointer (stack realignment) support. + bool canRealignStack(const MachineFunction &MF) const; + bool needsStackRealignment(const MachineFunction &MF) const override; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll new file mode 100644 index 0000000..a31c66b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -0,0 +1,491 @@ +; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; This test aims to check basic correctness of frame layout & +; frame access code. There are 8 functions in this test file, +; each function implements one element in the cartesian product +; of: +; . a function having a VLA/noVLA +; . a function with dynamic stack realignment/no dynamic stack realignment. +; . a function needing a frame pionter/no frame pointer, +; since the presence/absence of these has influence on the frame +; layout and which pointer to use to access various part of the +; frame (bp,sp,fp). +; +; Furthermore: in every test function: +; . there is always one integer and 1 floating point argument to be able +; to check those are accessed correctly. +; . there is always one local variable to check that is accessed +; correctly +; +; The LLVM-IR below was produced by clang on the following C++ code: +;extern "C" int g(); +;extern "C" int novla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; volatile int l1; +; return i10 + (int)d10 + l1 + g(); +;} +;extern "C" int novla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; volatile int l1; +; return i10 + (int)d10 + l1; +;} +;extern "C" int novla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; alignas(128) volatile int l1; +; return i10 + (int)d10 + l1 + g(); +;} +;extern "C" int novla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; alignas(128) volatile int l1; +; return i10 + (int)d10 + l1; +;} +; +;extern "C" int vla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; volatile int l1; +; volatile int vla[i1]; +; return i10 + (int)d10 + l1 + g() + vla[0]; +;} +;extern "C" int vla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; volatile int l1; +; volatile int vla[i1]; +; return i10 + (int)d10 + l1 + vla[0]; +;} +;extern "C" int vla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; alignas(128) volatile int l1; +; volatile int vla[i1]; +; return i10 + (int)d10 + l1 + g() + vla[0]; +;} +;extern "C" int vla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10, +; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10) +;{ +; // use an argument passed on the stack. +; alignas(128) volatile int l1; +; volatile int vla[i1]; +; return i10 + (int)d10 + l1 + vla[0]; +;} + + + +define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +entry: + %l1 = alloca i32, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 4 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %call = tail call i32 @g() + %add2 = add nsw i32 %add1, %call + ret i32 %add2 +} +; CHECK-LABEL: novla_nodynamicrealign_call +; CHECK: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #16] +; CHECK: add x29, sp, #16 +; Check correctness of cfi pseudo-instructions +; CHECK: .cfi_def_cfa w29, 16 +; CHECK: .cfi_offset w30, -8 +; CHECK: .cfi_offset w29, -16 +; CHECK: .cfi_offset w19, -24 +; CHECK: .cfi_offset w20, -32 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; Check correct access to local variable on the stack, through stack pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] +; Check epilogue: +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ret +; CHECK: .cfi_endproc + + +declare i32 @g() #0 + +; Function Attrs: nounwind +define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +entry: + %l1 = alloca i32, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 4 + %add1 = add nsw i32 %add, %l1.0.l1.0. + ret i32 %add1 +} +; CHECK-LABEL: novla_nodynamicrealign_nocall +; Check that space is reserved for one local variable on the stack. +; CHECK: sub sp, sp, #16 // =16 +; Check correct access to arguments passed on the stack, through stack pointer +; CHECK: ldr d[[DARG:[0-9]+]], [sp, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [sp, #24] +; Check correct access to local variable on the stack, through stack pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] +; Check epilogue: +; CHECK: add sp, sp, #16 // =16 +; CHECK: ret + + +define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +entry: + %l1 = alloca i32, align 128 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 128 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %call = tail call i32 @g() + %add2 = add nsw i32 %add1, %call + ret i32 %add2 +} + +; CHECK-LABEL: novla_dynamicrealign_call +; CHECK: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #16] +; CHECK: add x29, sp, #16 +; Check the dynamic realignment of the stack pointer to a 128-byte boundary +; CHECK: sub x9, sp, #96 +; CHECK: and sp, x9, #0xffffffffffffff80 +; Check correctness of cfi pseudo-instructions +; CHECK: .cfi_def_cfa w29, 16 +; CHECK: .cfi_offset w30, -8 +; CHECK: .cfi_offset w29, -16 +; CHECK: .cfi_offset w19, -24 +; CHECK: .cfi_offset w20, -32 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; Check correct access to local variable on the stack, through re-aligned stack pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [sp] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: sub sp, x29, #16 // =16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ret +; CHECK: .cfi_endproc + + +; Function Attrs: nounwind +define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +entry: + %l1 = alloca i32, align 128 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 128 + %add1 = add nsw i32 %add, %l1.0.l1.0. + ret i32 %add1 +} + +; CHECK-LABEL: novla_dynamicrealign_nocall +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp +; Check the dynamic realignment of the stack pointer to a 128-byte boundary +; CHECK: sub x9, sp, #112 +; CHECK: and sp, x9, #0xffffffffffffff80 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; Check correct access to local variable on the stack, through re-aligned stack pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [sp] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: mov sp, x29 +; CHECK: ldp x29, x30, [sp], #16 +; CHECK: ret + + +define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +entry: + %l1 = alloca i32, align 4 + %0 = zext i32 %i1 to i64 + %vla = alloca i32, i64 %0, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 4 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %call = tail call i32 @g() + %add2 = add nsw i32 %add1, %call + %1 = load volatile i32, i32* %vla, align 4, !tbaa !1 + %add3 = add nsw i32 %add2, %1 + ret i32 %add3 +} + +; CHECK-LABEL: vla_nodynamicrealign_call +; CHECK: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #16] +; CHECK: add x29, sp, #16 +; Check that space is reserved on the stack for the local variable, +; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned. +; CHECK: sub sp, sp, #16 +; Check correctness of cfi pseudo-instructions +; CHECK: .cfi_def_cfa w29, 16 +; CHECK: .cfi_offset w30, -8 +; CHECK: .cfi_offset w29, -16 +; CHECK: .cfi_offset w19, -24 +; CHECK: .cfi_offset w20, -32 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; CHECK: ubfx x9, x0, #0, #32 +; CHECK: lsl x9, x9, #2 +; CHECK: add x9, x9, #15 +; CHECK: and x9, x9, #0xfffffffffffffff0 +; CHECK: mov x10, sp +; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through frame pointer +; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-20] +; Check correct accessing of the VLA variable through the base pointer +; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: sub sp, x29, #16 // =16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ret +; CHECK: .cfi_endproc + + +; Function Attrs: nounwind +define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +entry: + %l1 = alloca i32, align 4 + %0 = zext i32 %i1 to i64 + %vla = alloca i32, i64 %0, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 4 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %1 = load volatile i32, i32* %vla, align 4, !tbaa !1 + %add2 = add nsw i32 %add1, %1 + ret i32 %add2 +} + +; CHECK-LABEL: vla_nodynamicrealign_nocall +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #-16]! +; CHECK: mov x29, sp +; Check that space is reserved on the stack for the local variable, +; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned. +; CHECK: sub sp, sp, #16 +; Check correctness of cfi pseudo-instructions +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; CHECK: ubfx x9, x0, #0, #32 +; CHECK: lsl x9, x9, #2 +; CHECK: add x9, x9, #15 +; CHECK: and x9, x9, #0xfffffffffffffff0 +; CHECK: mov x10, sp +; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through frame pointer +; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4] +; Check correct accessing of the VLA variable through the base pointer +; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: mov sp, x29 +; CHECK: ldp x29, x30, [sp], #16 +; CHECK: ret + + +define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +entry: + %l1 = alloca i32, align 128 + %0 = zext i32 %i1 to i64 + %vla = alloca i32, i64 %0, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 128 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %call = tail call i32 @g() + %add2 = add nsw i32 %add1, %call + %1 = load volatile i32, i32* %vla, align 4, !tbaa !1 + %add3 = add nsw i32 %add2, %1 + ret i32 %add3 +} + +; CHECK-LABEL: vla_dynamicrealign_call +; CHECK: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK: stp x22, x21, [sp, #-48]! +; CHECK: stp x20, x19, [sp, #16] +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #32] +; CHECK: add x29, sp, #32 +; Check that the stack pointer gets re-aligned to 128 +; bytes & the base pointer (x19) gets initialized to +; this 128-byte aligned area for local variables & +; spill slots +; CHECK: sub x9, sp, #80 // =80 +; CHECK: and sp, x9, #0xffffffffffffff80 +; CHECK: mov x19, sp +; Check correctness of cfi pseudo-instructions +; CHECK: .cfi_def_cfa w29, 16 +; CHECK: .cfi_offset w30, -8 +; CHECK: .cfi_offset w29, -16 +; CHECK: .cfi_offset w19, -24 +; CHECK: .cfi_offset w20, -32 +; CHECK: .cfi_offset w21, -40 +; CHECK: .cfi_offset w22, -48 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; and set-up of base pointer (x19). +; CHECK: ubfx x9, x0, #0, #32 +; CHECK: lsl x9, x9, #2 +; CHECK: add x9, x9, #15 +; CHECK: and x9, x9, #0xfffffffffffffff0 +; CHECK: mov x10, sp +; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through base pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [x19] +; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: sub sp, x29, #32 +; CHECK: ldp x29, x30, [sp, #32] +; CHECK: ldp x20, x19, [sp, #16] +; CHECK: ldp x22, x21, [sp], #48 +; CHECK: ret +; CHECK: .cfi_endproc + + +; Function Attrs: nounwind +define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +entry: + %l1 = alloca i32, align 128 + %0 = zext i32 %i1 to i64 + %vla = alloca i32, i64 %0, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 128 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %1 = load volatile i32, i32* %vla, align 4, !tbaa !1 + %add2 = add nsw i32 %add1, %1 + ret i32 %add2 +} + +; CHECK-LABEL: vla_dynamicrealign_nocall +; Check that used callee-saved registers are saved +; CHECK: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #16] +; CHECK: add x29, sp, #16 +; Check that the stack pointer gets re-aligned to 128 +; bytes & the base pointer (x19) gets initialized to +; this 128-byte aligned area for local variables & +; spill slots +; CHECK: sub x9, sp, #96 +; CHECK: and sp, x9, #0xffffffffffffff80 +; CHECK: mov x19, sp +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; and set-up of base pointer (x19). +; CHECK: ubfx x9, x0, #0, #32 +; CHECK: lsl x9, x9, #2 +; CHECK: add x9, x9, #15 +; CHECK: and x9, x9, #0xfffffffffffffff0 +; CHECK: mov x10, sp +; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through base pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [x19] +; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: sub sp, x29, #16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ret + + +; Function Attrs: nounwind +define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +entry: + %l1 = alloca i32, align 32768 + %0 = zext i32 %i1 to i64 + %vla = alloca i32, i64 %0, align 4 + %conv = fptosi double %d10 to i32 + %add = add nsw i32 %conv, %i10 + %l1.0.l1.0. = load volatile i32, i32* %l1, align 32768 + %add1 = add nsw i32 %add, %l1.0.l1.0. + %1 = load volatile i32, i32* %vla, align 4, !tbaa !1 + %add2 = add nsw i32 %add1, %1 + ret i32 %add2 +} + +; CHECK-LABEL: vla_dynamicrealign_nocall_large_align +; Check that used callee-saved registers are saved +; CHECK: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK: stp x29, x30, [sp, #16] +; CHECK: add x29, sp, #16 +; Check that the stack pointer gets re-aligned to 128 +; bytes & the base pointer (x19) gets initialized to +; this 128-byte aligned area for local variables & +; spill slots +; CHECK: sub x9, sp, #7, lsl #12 +; CHECK: and sp, x9, #0xffffffffffff8000 +; CHECK: mov x19, sp +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] +; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; and set-up of base pointer (x19). +; CHECK: ubfx x9, x0, #0, #32 +; CHECK: lsl x9, x9, #2 +; CHECK: add x9, x9, #15 +; CHECK: and x9, x9, #0xfffffffffffffff0 +; CHECK: mov x10, sp +; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through base pointer +; CHECK: ldr w[[ILOC:[0-9]+]], [x19] +; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK: sub sp, x29, #16 +; CHECK: ldp x29, x30, [sp, #16] +; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ret + +attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} -- 2.7.4