int FrameIdx;
int Offset;
bool IsGPR;
+ bool isPaired() const { return Reg2 != AArch64::NoRegister; }
};
-static void
-computeCalleeSaveRegisterPairs(const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI,
- SmallVectorImpl<RegPairInfo> &RegPairs) {
+static void computeCalleeSaveRegisterPairs(
+ MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) {
- unsigned Count = CSI.size();
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+ if (CSI.empty())
+ return;
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned idx = Count - i - 2;
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned Count = CSI.size();
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs.
+ assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
+ (Count & 1) == 0) &&
+ "Odd number of callee-saved regs to spill!");
+ unsigned Offset = AFI->getCalleeSavedStackSize();
+
+ for (unsigned i = 0; i < Count; ++i) {
RegPairInfo RPI;
- RPI.Reg1 = CSI[idx].getReg();
- RPI.Reg2 = CSI[idx + 1].getReg();
+ RPI.Reg1 = CSI[i].getReg();
+
+ assert(AArch64::GPR64RegClass.contains(RPI.Reg1) ||
+ AArch64::FPR64RegClass.contains(RPI.Reg1));
+ RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
+
+ // Add the next reg to the pair if it is in the same register class.
+ if (i + 1 < Count) {
+ unsigned NextReg = CSI[i + 1].getReg();
+ if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
+ (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
+ RPI.Reg2 = NextReg;
+ }
// GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
// list to come in sorted by frame index so that we can issue the store
//
// The order of the registers in the list is controlled by
// getCalleeSavedRegs(), so they will always be in-order, as well.
- assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
+ assert((!RPI.isPaired() ||
+ (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
"Out of order callee saved regs!");
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- RPI.FrameIdx = CSI[idx + 1].getFrameIdx();
-
- if (AArch64::GPR64RegClass.contains(RPI.Reg1))
- RPI.IsGPR = true;
- else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
- RPI.IsGPR = false;
- else
- llvm_unreachable("Unexpected callee saved register!");
- // Compute offset: i = 0 => offset = Count;
- // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
- RPI.Offset = (i == 0) ? Count : i;
+
+ // MachO's compact unwind format relies on all registers being stored in
+ // adjacent register pairs.
+ assert((!MF.getSubtarget<AArch64Subtarget>().isTargetMachO() ||
+ (RPI.isPaired() &&
+ ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
+ RPI.Reg1 + 1 == RPI.Reg2))) &&
+ "Callee-save registers not saved as adjacent register pair!");
+
+ RPI.FrameIdx = CSI[i].getFrameIdx();
+
+ if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
+ // Round up size of non-pair to pair size if we need to pad the
+ // callee-save area to ensure 16-byte alignment.
+ Offset -= 16;
+ assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
+ MFI->setObjectSize(RPI.FrameIdx, 16);
+ } else
+ Offset -= RPI.isPaired() ? 16 : 8;
+ assert(Offset % 8 == 0);
+ RPI.Offset = Offset / 8;
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
+ if (RPI.isPaired())
+ ++i;
}
+
+ // Align first offset to even 16-byte boundary to avoid additional SP
+ // adjustment instructions.
+ // Last pair offset is size of whole callee-save region for SP
+ // pre-dec/post-inc.
+ RegPairInfo &LastPair = RegPairs.back();
+ assert(AFI->getCalleeSavedStackSize() % 8 == 0);
+ LastPair.Offset = AFI->getCalleeSavedStackSize() / 8;
}
bool AArch64FrameLowering::spillCalleeSavedRegisters(
DebugLoc DL;
SmallVector<RegPairInfo, 8> RegPairs;
- computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs);
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
- for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
+ for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
unsigned Reg1 = RPI.Reg1;
// Rationale: This sequence saves uop updates compared to a sequence of
// pre-increment spills like stp xi,xj,[sp,#-16]!
// Note: Similar rationale and sequence for restores in epilog.
- bool BumpSP = RPII == RegPairs.begin();
+ bool BumpSP = RPII == RegPairs.rbegin();
if (RPI.IsGPR) {
// For first spill use pre-increment store.
if (BumpSP)
- StrOpc = AArch64::STPXpre;
+ StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre;
else
- StrOpc = AArch64::STPXi;
+ StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
} else {
// For first spill use pre-increment store.
if (BumpSP)
- StrOpc = AArch64::STPDpre;
+ StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre;
else
- StrOpc = AArch64::STPDi;
+ StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
}
- DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx
- << ", " << RPI.FrameIdx+1 << ")\n");
+ DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1);
+ if (RPI.isPaired())
+ dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx+1;
+ dbgs() << ")\n");
const int Offset = BumpSP ? -RPI.Offset : RPI.Offset;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
if (BumpSP)
MIB.addReg(AArch64::SP, RegState::Define);
- MBB.addLiveIn(Reg1);
- MBB.addLiveIn(Reg2);
- MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
+ if (RPI.isPaired()) {
+ MBB.addLiveIn(Reg1);
+ MBB.addLiveIn(Reg2);
+ MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
.addReg(Reg1, getPrologueDeath(MF, Reg1))
.addReg(AArch64::SP)
.addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
.setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ MBB.addLiveIn(Reg1);
+ MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
+ .addReg(AArch64::SP)
+ .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
return true;
}
if (MI != MBB.end())
DL = MI->getDebugLoc();
- computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs);
+ computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs);
- for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
+ for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE;
++RPII) {
RegPairInfo RPI = *RPII;
unsigned Reg1 = RPI.Reg1;
// ldp x22, x21, [sp], #48 // addImm(+6)
// Note: see comment in spillCalleeSavedRegisters()
unsigned LdrOpc;
- bool BumpSP = RPII == std::prev(RegPairs.rend());
+ bool BumpSP = RPII == std::prev(RegPairs.end());
if (RPI.IsGPR) {
if (BumpSP)
- LdrOpc = AArch64::LDPXpost;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost;
else
- LdrOpc = AArch64::LDPXi;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
} else {
if (BumpSP)
- LdrOpc = AArch64::LDPDpost;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost;
else
- LdrOpc = AArch64::LDPDi;
+ LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
}
- DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx
- << ", " << RPI.FrameIdx+1 << ")\n");
+ DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1);
+ if (RPI.isPaired())
+ dbgs() << ", " << TRI->getName(Reg2);
+ dbgs() << ") -> fi#(" << RPI.FrameIdx;
+ if (RPI.isPaired())
+ dbgs() << ", " << RPI.FrameIdx+1;
+ dbgs() << ")\n");
const int Offset = RPI.Offset;
MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
if (BumpSP)
MIB.addReg(AArch64::SP, RegState::Define);
- MIB.addReg(Reg2, getDefRegState(true))
+ if (RPI.isPaired())
+ MIB.addReg(Reg2, getDefRegState(true))
.addReg(Reg1, getDefRegState(true))
.addReg(AArch64::SP)
.addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8]
// where the factor * 8 is implicit
.setMIFlag(MachineInstr::FrameDestroy);
+ else
+ MIB.addReg(Reg1, getDefRegState(true))
+ .addReg(AArch64::SP)
+ .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled
+ .setMIFlag(MachineInstr::FrameDestroy);
}
return true;
}
const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- SmallVector<unsigned, 4> UnspilledCSGPRs;
- SmallVector<unsigned, 4> UnspilledCSFPRs;
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ unsigned UnspilledCSGPR = AArch64::NoRegister;
+ unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
SavedRegs.set(AArch64::LR);
}
- // Spill the BasePtr if it's used. Do this first thing so that the
- // getCalleeSavedRegs() below will get the right answer.
+ unsigned BasePointerReg = AArch64::NoRegister;
if (RegInfo->hasBasePointer(MF))
- SavedRegs.set(RegInfo->getBaseRegister());
+ BasePointerReg = RegInfo->getBaseRegister();
+ unsigned StackAlignReg = AArch64::NoRegister;
if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
- SavedRegs.set(AArch64::X9);
+ StackAlignReg = AArch64::X9;
- // If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumGPRSpilled = 0;
- unsigned NumFPRSpilled = 0;
bool ExtraCSSpill = false;
- bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ // Figure out which callee-saved registers to save/restore.
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ const unsigned Reg = CSRegs[i];
+
+ // Add the stack re-align scratch register and base pointer register to
+ // SavedRegs set only if they are callee-save.
+ if (Reg == BasePointerReg || Reg == StackAlignReg)
+ SavedRegs.set(Reg);
- // Check pairs of consecutive callee-saved registers.
- for (unsigned i = 0; CSRegs[i]; i += 2) {
- assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
-
- const unsigned OddReg = CSRegs[i];
- const unsigned EvenReg = CSRegs[i + 1];
- assert((AArch64::GPR64RegClass.contains(OddReg) &&
- AArch64::GPR64RegClass.contains(EvenReg)) ^
- (AArch64::FPR64RegClass.contains(OddReg) &&
- AArch64::FPR64RegClass.contains(EvenReg)) &&
- "Register class mismatch!");
-
- const bool OddRegUsed = SavedRegs.test(OddReg);
- const bool EvenRegUsed = SavedRegs.test(EvenReg);
-
- // Early exit if none of the registers in the register pair is actually
- // used.
- if (!OddRegUsed && !EvenRegUsed) {
- if (AArch64::GPR64RegClass.contains(OddReg)) {
- UnspilledCSGPRs.push_back(OddReg);
- UnspilledCSGPRs.push_back(EvenReg);
- } else {
- UnspilledCSFPRs.push_back(OddReg);
- UnspilledCSFPRs.push_back(EvenReg);
+ bool RegUsed = SavedRegs.test(Reg);
+ unsigned PairedReg = CSRegs[i ^ 1];
+ if (!RegUsed) {
+ if (AArch64::GPR64RegClass.contains(Reg) &&
+ !RegInfo->isReservedReg(MF, Reg)) {
+ UnspilledCSGPR = Reg;
+ UnspilledCSGPRPaired = PairedReg;
}
continue;
}
- unsigned Reg = AArch64::NoRegister;
- // If only one of the registers of the register pair is used, make sure to
- // mark the other one as used as well.
- if (OddRegUsed ^ EvenRegUsed) {
- // Find out which register is the additional spill.
- Reg = OddRegUsed ? EvenReg : OddReg;
- SavedRegs.set(Reg);
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs.
+ // FIXME: the usual format is actually better if unwinding isn't needed.
+ if (Subtarget.isTargetMachO() && !SavedRegs.test(PairedReg)) {
+ SavedRegs.set(PairedReg);
+ ExtraCSSpill = true;
}
+ }
- DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
- DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
-
- assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
- (RegInfo->getEncodingValue(OddReg) + 1 ==
- RegInfo->getEncodingValue(EvenReg))) &&
- "Register pair of non-adjacent registers!");
- if (AArch64::GPR64RegClass.contains(OddReg)) {
- NumGPRSpilled += 2;
- // If it's not a reserved register, we can use it in lieu of an
- // emergency spill slot for the register scavenger.
- // FIXME: It would be better to instead keep looking and choose another
- // unspilled register that isn't reserved, if there is one.
- if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- } else
- NumFPRSpilled += 2;
+ DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
+ for (int Reg = SavedRegs.find_first(); Reg != -1;
+ Reg = SavedRegs.find_next(Reg))
+ dbgs() << ' ' << PrintReg(Reg, RegInfo);
+ dbgs() << "\n";);
- CanEliminateFrame = false;
- }
- DEBUG(dbgs() << "\n");
+ // If any callee-saved registers are used, the frame cannot be eliminated.
+ unsigned NumRegsSpilled = SavedRegs.count();
+ bool CanEliminateFrame = NumRegsSpilled == 0;
// FIXME: Set BigStack if any stack slot references may be out of range.
// For now, just conservatively guestimate based on unscaled indexing
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize =
- MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
// above to keep the number of spills even, we don't need to do anything else
// here.
if (BigStack && !ExtraCSSpill) {
-
- // If we're adding a register to spill here, we have to add two of them
- // to keep the number of regs to spill even.
- assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
- unsigned Count = 0;
- while (!UnspilledCSGPRs.empty() && Count < 2) {
- unsigned Reg = UnspilledCSGPRs.back();
- UnspilledCSGPRs.pop_back();
- DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
- << " to get a scratch register.\n");
- SavedRegs.set(Reg);
+ if (UnspilledCSGPR != AArch64::NoRegister) {
+ DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo)
+ << " to get a scratch register.\n");
+ SavedRegs.set(UnspilledCSGPR);
+ // MachO's compact unwind format relies on all registers being stored in
+ // pairs, so if we need to spill one extra for BigStack, then we need to
+ // store the pair.
+ if (Subtarget.isTargetMachO())
+ SavedRegs.set(UnspilledCSGPRPaired);
ExtraCSSpill = true;
- ++Count;
- ++NumGPRSpilled;
+ NumRegsSpilled = SavedRegs.count();
}
// If we didn't find an extra callee-saved register to spill, create
}
}
- AFI->setCalleeSavedStackSize(8 * (NumGPRSpilled + NumFPRSpilled));
+ // Round up to register pair alignment to avoid additional SP adjustment
+ // instructions.
+ AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
}
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
; CHECK-LABEL: novla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12]
+; Check epilogue:
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
declare i32 @g() #0
; CHECK-LABEL: novla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
-; CHECK: .cfi_offset w19, -24
-; CHECK: .cfi_offset w20, -32
+; CHECK: .cfi_offset w19, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _novla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check the dynamic realignment of the stack pointer to a 128-byte boundary
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; Check correct access to local variable on the stack, through re-aligned stack pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
; CHECK-LABEL: vla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
-; CHECK: stp x22, x21, [sp, #-48]!
+; CHECK: str x21, [sp, #-48]!
; CHECK: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #32]
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
-; CHECK: .cfi_offset w21, -40
-; CHECK: .cfi_offset w22, -48
+; CHECK: .cfi_offset w21, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: sub sp, x29, #32
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
-; CHECK: ldp x22, x21, [sp], #48
+; CHECK: ldr x21, [sp], #48
; CHECK: ret
; CHECK: .cfi_endproc
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_call:
+; CHECK-MACHO: .cfi_startproc
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x22, x21, [sp, #-48]!
+; CHECK-MACHO: stp x20, x19, [sp, #16]
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #32]
+; CHECK-MACHO: add x29, sp, #32
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #80
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correctness of cfi pseudo-instructions
+; CHECK-MACHO: .cfi_def_cfa w29, 16
+; CHECK-MACHO: .cfi_offset w30, -8
+; CHECK-MACHO: .cfi_offset w29, -16
+; CHECK-MACHO: .cfi_offset w19, -24
+; CHECK-MACHO: .cfi_offset w20, -32
+; CHECK-MACHO: .cfi_offset w21, -40
+; CHECK-MACHO: .cfi_offset w22, -48
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #32
+; CHECK-MACHO: ldp x29, x30, [sp, #32]
+; CHECK-MACHO: ldp x20, x19, [sp, #16]
+; CHECK-MACHO: ldp x22, x21, [sp], #48
+; CHECK-MACHO: ret
+; CHECK-MACHO: .cfi_endproc
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
; CHECK-LABEL: vla_dynamicrealign_nocall
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: str x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldr x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #96
+; CHECK-MACHO: and sp, x9, #0xffffffffffffff80
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
; Check that used callee-saved registers are saved
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
; CHECK: ret
+; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align:
+; Check that used callee-saved registers are saved
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; Check that the frame pointer is created:
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; Check that the stack pointer gets re-aligned to 128
+; bytes & the base pointer (x19) gets initialized to
+; this 128-byte aligned area for local variables &
+; spill slots
+; CHECK-MACHO: sub x9, sp, #7, lsl #12
+; CHECK-MACHO: and sp, x9, #0xffffffffffff8000
+; CHECK-MACHO: mov x19, sp
+; Check correct access to arguments passed on the stack, through frame pointer
+; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20]
+; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32]
+; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
+; and set-up of base pointer (x19).
+; CHECK-MACHO: mov w9, w0
+; CHECK-MACHO: mov x10, sp
+; CHECK-MACHO: lsl x9, x9, #2
+; CHECK-MACHO: add x9, x9, #15
+; CHECK-MACHO: and x9, x9, #0x7fffffff0
+; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9
+; CHECK-MACHO: mov sp, x[[VLASPTMP]]
+; Check correct access to local variable, through base pointer
+; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19]
+; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
+; Check epilogue:
+; Check that stack pointer get restored from frame pointer.
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
+; CHECK-MACHO: ret
+
define void @realign_conditional(i1 %b) {
entry:
; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO
; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
declare void @use_addr(i8*)
define void @test_alloca_large_frame(i64 %n) {
; CHECK-LABEL: test_alloca_large_frame:
+; CHECK-MACHO-LABEL: test_alloca_large_frame:
-; CHECK: stp x20, x19, [sp, #-32]!
+; CHECK: stp x28, x19, [sp, #-32]!
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; CHECK: sub sp, sp, #1953, lsl #12
; CHECK: sub sp, sp, #512
+; CHECK-MACHO: stp x20, x19, [sp, #-32]!
+; CHECK-MACHO: stp x29, x30, [sp, #16]
+; CHECK-MACHO: add x29, sp, #16
+; CHECK-MACHO: sub sp, sp, #1953, lsl #12
+; CHECK-MACHO: sub sp, sp, #512
+
%addr1 = alloca i8, i64 %n
%addr2 = alloca i64, i64 1000000
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
-; CHECK: ldp x20, x19, [sp], #32
+; CHECK: ldp x28, x19, [sp], #32
+
+; CHECK-MACHO: sub sp, x29, #16
+; CHECK-MACHO: ldp x29, x30, [sp, #16]
+; CHECK-MACHO: ldp x20, x19, [sp], #32
}
declare i8* @llvm.stacksave()
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
; rdar://9167275
; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s
;
; rdar://14075006
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: odd:
+; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]!
+; CHECK-NOTMACHO: stp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: str x27, [sp, #32]
+; CHECK-NOTMACHO: stp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: stp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: movz x0, #0x2a
+; CHECK-NOTMACHO: ldp x21, x19, [sp, #64]
+; CHECK-NOTMACHO: ldp x25, x23, [sp, #48]
+; CHECK-NOTMACHO: ldr x27, [sp, #32]
+; CHECK-NOTMACHO: ldp d10, d8, [sp, #16]
+; CHECK-NOTMACHO: ldp d14, d12, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind
ret void
}
; CHECK: ldp d11, d10, [sp, #32]
; CHECK: ldp d13, d12, [sp, #16]
; CHECK: ldp d15, d14, [sp], #144
+
+; CHECK-NOTMACHO-LABEL: even:
+; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]!
+; CHECK-NOTMACHO: stp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: str x28, [sp, #32]
+; CHECK-NOTMACHO: stp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: stp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: movz x0, #0x2a
+; CHECK-NOTMACHO: ldp x22, x20, [sp, #64]
+; CHECK-NOTMACHO: ldp x26, x24, [sp, #48]
+; CHECK-NOTMACHO: ldr x28, [sp, #32]
+; CHECK-NOTMACHO: ldp d11, d9, [sp, #16]
+; CHECK-NOTMACHO: ldp d15, d13, [sp], #80
call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind
ret void
}