return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
}
+// Convenience function to determine whether I is an SVE callee save.
+bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STR_ZXI:
+ case AArch64::STR_PXI:
+ case AArch64::LDR_ZXI:
+ case AArch64::LDR_PXI:
+ return I->getFlag(MachineInstr::FrameSetup) ||
+ I->getFlag(MachineInstr::FrameDestroy);
+ }
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
// and pre-inc if we decided to combine the callee-save and local stack
// pointer bump above.
MachineBasicBlock::iterator End = MBB.end();
- while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
+ while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
+ !IsSVECalleeSave(MBBI)) {
if (CombineSPBump)
fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
NeedsWinCFI, &HasWinCFI);
NumBytes = 0;
}
- emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
+ StackOffset AllocateBefore = SVEStackSize, AllocateAfter = {};
+ MachineBasicBlock::iterator CalleeSavesBegin = MBBI, CalleeSavesEnd = MBBI;
+
+ // Process the SVE callee-saves to determine what space needs to be
+ // allocated.
+ if (AFI->getSVECalleeSavedStackSize()) {
+ // Find callee save instructions in frame.
+ CalleeSavesBegin = MBBI;
+ assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
+ while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
+ ++MBBI;
+ CalleeSavesEnd = MBBI;
+
+ int64_t OffsetToFirstCalleeSaveFromSP =
+ MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
+ StackOffset OffsetToCalleeSavesFromSP =
+ StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
+ AllocateBefore -= OffsetToCalleeSavesFromSP;
+ AllocateAfter = SVEStackSize - AllocateBefore;
+ }
+
+ // Allocate space for the callee saves (if any).
+ emitFrameOffset(MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP,
+ -AllocateBefore, TII,
+ MachineInstr::FrameSetup);
+
+ // Finally allocate remaining SVE stack space.
+ emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
+ -AllocateAfter, TII,
MachineInstr::FrameSetup);
// Allocate space for the rest of the frame.
MachineBasicBlock::iterator Begin = MBB.begin();
while (LastPopI != Begin) {
--LastPopI;
- if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
+ if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
+ IsSVECalleeSave(LastPopI)) {
++LastPopI;
break;
} else if (CombineSPBump)
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
+ // Process the SVE callee-saves to determine what space needs to be
+ // deallocated.
+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
+ MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
+ if (AFI->getSVECalleeSavedStackSize()) {
+ RestoreBegin = std::prev(RestoreEnd);;
+ while (IsSVECalleeSave(RestoreBegin) &&
+ RestoreBegin != MBB.begin())
+ --RestoreBegin;
+ ++RestoreBegin;
+
+ assert(IsSVECalleeSave(RestoreBegin) &&
+ IsSVECalleeSave(std::prev(RestoreEnd)) && "Unexpected instruction");
+
+ int64_t OffsetToFirstCalleeSaveFromSP =
+ MFI.getObjectOffset(AFI->getMaxSVECSFrameIndex());
+ StackOffset OffsetToCalleeSavesFromSP =
+ StackOffset(OffsetToFirstCalleeSaveFromSP, MVT::nxv1i8) + SVEStackSize;
+ DeallocateBefore = OffsetToCalleeSavesFromSP;
+ DeallocateAfter = SVEStackSize - DeallocateBefore;
+ }
+
// Deallocate the SVE area.
- if (SVEStackSize)
- if (!AFI->isStackRealigned())
- emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
- TII, MachineInstr::FrameDestroy);
+ if (SVEStackSize) {
+ if (AFI->isStackRealigned()) {
+ if (AFI->getSVECalleeSavedStackSize())
+ // Set SP to start of SVE area, from which the callee-save reloads
+ // can be done. The code below will deallocate the stack space
+ // space by moving FP -> SP.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::FP,
+ -SVEStackSize, TII, MachineInstr::FrameDestroy);
+ } else {
+ if (AFI->getSVECalleeSavedStackSize()) {
+ // Deallocate the non-SVE locals first before we can deallocate (and
+ // restore callee saves) from the SVE area.
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy);
+ NumBytes = 0;
+ }
+
+ emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
+ DeallocateBefore, TII, MachineInstr::FrameDestroy);
+
+ emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
+ DeallocateAfter, TII, MachineInstr::FrameDestroy);
+ }
+ }
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
unsigned Reg2 = AArch64::NoRegister;
int FrameIdx;
int Offset;
- enum RegType { GPR, FPR64, FPR128 } Type;
+ enum RegType { GPR, FPR64, FPR128, PPR, ZPR } Type;
RegPairInfo() = default;
bool isPaired() const { return Reg2 != AArch64::NoRegister; }
+
+ unsigned getScale() const {
+ switch (Type) {
+ case PPR:
+ return 2;
+ case GPR:
+ case FPR64:
+ return 8;
+ case ZPR:
+ case FPR128:
+ return 16;
+ default:
+ llvm_unreachable("Unsupported type");
+ }
+ }
+
+ bool isScalable() const { return Type == PPR || Type == ZPR; }
};
} // end anonymous namespace
CC == CallingConv::PreserveMost ||
(Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
- int Offset = AFI->getCalleeSavedStackSize();
+ int ByteOffset = AFI->getCalleeSavedStackSize();
+ int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
// On Linux, we will have either one or zero non-paired register. On Windows
// with CFI, we can have multiple unpaired registers in order to utilize the
// available unwind codes. This flag assures that the alignment fixup is done
RPI.Type = RegPairInfo::FPR64;
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
RPI.Type = RegPairInfo::FPR128;
+ else if (AArch64::ZPRRegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::ZPR;
+ else if (AArch64::PPRRegClass.contains(RPI.Reg1))
+ RPI.Type = RegPairInfo::PPR;
else
llvm_unreachable("Unsupported register class.");
if (AArch64::FPR128RegClass.contains(NextReg))
RPI.Reg2 = NextReg;
break;
+ case RegPairInfo::PPR:
+ case RegPairInfo::ZPR:
+ break;
}
}
RPI.FrameIdx = CSI[i].getFrameIdx();
- int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
- Offset -= RPI.isPaired() ? 2 * Scale : Scale;
+ int Scale = RPI.getScale();
+ if (RPI.isScalable())
+ ScalableByteOffset -= Scale;
+ else
+ ByteOffset -= RPI.isPaired() ? 2 * Scale : Scale;
+
+ assert(!(RPI.isScalable() && RPI.isPaired()) &&
+ "Paired spill/fill instructions don't exist for SVE vectors");
// Round up size of non-pair to pair size if we need to pad the
// callee-save area to ensure 16-byte alignment.
if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
- RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
+ !RPI.isScalable() && RPI.Type != RegPairInfo::FPR128 &&
+ !RPI.isPaired()) {
FixupDone = true;
- Offset -= 8;
- assert(Offset % 16 == 0);
+ ByteOffset -= 8;
+ assert(ByteOffset % 16 == 0);
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
MFI.setObjectAlignment(RPI.FrameIdx, 16);
}
+ int Offset = RPI.isScalable() ? ScalableByteOffset : ByteOffset;
assert(Offset % Scale == 0);
RPI.Offset = Offset / Scale;
- assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
+
+ assert(((!RPI.isScalable() && RPI.Offset >= -64 && RPI.Offset <= 63) ||
+ (RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
RegPairs.push_back(RPI);
Size = 16;
Align = 16;
break;
+ case RegPairInfo::ZPR:
+ StrOpc = AArch64::STR_ZXI;
+ Size = 16;
+ Align = 16;
+ break;
+ case RegPairInfo::PPR:
+ StrOpc = AArch64::STR_PXI;
+ Size = 2;
+ Align = 2;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+ // Update the StackIDs of the SVE stack slots.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (RPI.Type == RegPairInfo::ZPR || RPI.Type == RegPairInfo::PPR)
+ MFI.setStackID(RPI.FrameIdx, TargetStackID::SVEVector);
+
}
return true;
}
Size = 16;
Align = 16;
break;
+ case RegPairInfo::ZPR:
+ LdrOpc = AArch64::LDR_ZXI;
+ Size = 16;
+ Align = 16;
+ break;
+ case RegPairInfo::PPR:
+ LdrOpc = AArch64::LDR_PXI;
+ Size = 2;
+ Align = 2;
+ break;
}
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
if (NeedsWinCFI)
InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
};
- if (ReverseCSRRestoreSeq)
- for (const RegPairInfo &RPI : reverse(RegPairs))
+
+ // SVE objects are always restored in reverse order.
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ if (RPI.isScalable())
EmitMI(RPI);
- else
+
+ if (ReverseCSRRestoreSeq) {
+ for (const RegPairInfo &RPI : reverse(RegPairs))
+ if (!RPI.isScalable())
+ EmitMI(RPI);
+ } else
for (const RegPairInfo &RPI : RegPairs)
- EmitMI(RPI);
+ if (!RPI.isScalable())
+ EmitMI(RPI);
if (NeedShadowCallStackProlog) {
// Shadow call stack epilog: ldr x30, [x18, #-8]!
SavedRegs.set(Reg);
bool RegUsed = SavedRegs.test(Reg);
- unsigned PairedReg = CSRegs[i ^ 1];
+ unsigned PairedReg = AArch64::NoRegister;
+ if (AArch64::GPR64RegClass.contains(Reg) ||
+ AArch64::FPR64RegClass.contains(Reg) ||
+ AArch64::FPR128RegClass.contains(Reg))
+ PairedReg = CSRegs[i ^ 1];
+
if (!RegUsed) {
if (AArch64::GPR64RegClass.contains(Reg) &&
!RegInfo->isReservedReg(MF, Reg)) {
// Calculates the callee saved stack size.
unsigned CSStackSize = 0;
+ unsigned SVECSStackSize = 0;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned Reg : SavedRegs.set_bits())
- CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
+ for (unsigned Reg : SavedRegs.set_bits()) {
+ auto RegSize = TRI->getRegSizeInBits(Reg, MRI) / 8;
+ if (AArch64::PPRRegClass.contains(Reg) ||
+ AArch64::ZPRRegClass.contains(Reg))
+ SVECSStackSize += RegSize;
+ else
+ CSStackSize += RegSize;
+ }
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
dbgs() << "\n";);
// If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned MaxAlign = getStackAlignment();
int64_t SVEStackSize =
- alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
- assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ alignTo(SVECSStackSize + estimateSVEStackObjectOffsets(MFI), 16);
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// instructions.
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
+ AFI->setSVECalleeSavedStackSize(alignTo(SVECSStackSize, 16));
}
bool AArch64FrameLowering::enableStackSlotScavenging(
return AFI->hasCalleeSaveStackFreeSpace();
}
-int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
- unsigned &MaxAlign) const {
- // Process all fixed stack objects.
+/// returns true if there are any SVE callee saves.
+static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
+ int &Min, int &Max) {
+ if (!MFI.isCalleeSavedInfoValid())
+ return false;
+
+ Min = std::numeric_limits<int>::max();
+ Max = std::numeric_limits<int>::min();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ for (auto &CS : CSI) {
+ if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
+ AArch64::PPRRegClass.contains(CS.getReg())) {
+ assert((Max == std::numeric_limits<int>::min() ||
+ Max + 1 == CS.getFrameIdx()) &&
+ "SVE CalleeSaves are not consecutive");
+
+ Min = std::min(Min, CS.getFrameIdx());
+ Max = std::max(Max, CS.getFrameIdx());
+ }
+ }
+ return Min != std::numeric_limits<int>::max();
+}
+
+// Process all the SVE stack objects and determine offsets for each
+// object. If AssignOffsets is true, the offsets get assigned.
+// Fills in the first and last callee-saved frame indices into
+// Min/MaxCSFrameIndex, respectively.
+// Returns the size of the stack.
+static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
+ int &MinCSFrameIndex,
+ int &MaxCSFrameIndex,
+ bool AssignOffsets) {
+ // First process all fixed stack objects.
int64_t Offset = 0;
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
Offset = FixedOffset;
}
+ // Then process all callee saved slots.
+ if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
+ // Make sure to align the last callee save slot.
+ MFI.setObjectAlignment(MaxCSFrameIndex, 16U);
+
+ // Assign offsets to the callee save slots.
+ for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
+ Offset += MFI.getObjectSize(I);
+ Offset = alignTo(Offset, MFI.getObjectAlignment(I));
+ if (AssignOffsets) {
+ LLVM_DEBUG(dbgs() << "alloc FI(" << I << ") at SP[" << Offset
+ << "]\n");
+ MFI.setObjectOffset(I, -Offset);
+ }
+ }
+ }
+
// Note: We don't take allocatable stack objects into
// account yet, because allocation for those is not yet
// implemented.
return Offset;
}
+int64_t AArch64FrameLowering::estimateSVEStackObjectOffsets(
+ MachineFrameInfo &MFI) const {
+ int MinCSFrameIndex, MaxCSFrameIndex;
+ return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex, false);
+}
+
+int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
+ MachineFrameInfo &MFI, int &MinCSFrameIndex, int &MaxCSFrameIndex) const {
+ return determineSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex,
+ true);
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- unsigned MaxAlign = getStackAlignment();
- int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
+ int MinCSFrameIndex, MaxCSFrameIndex;
+ int64_t SVEStackSize =
+ assignSVEStackObjectOffsets(MFI, MinCSFrameIndex, MaxCSFrameIndex);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
- assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ AFI->setStackSizeSVE(alignTo(SVEStackSize, 16U));
+ AFI->setMinMaxSVECSFrameIndex(MinCSFrameIndex, MaxCSFrameIndex);
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
define void @test_address_sve_fp() nounwind { entry: unreachable }
define void @test_stack_arg_sve() nounwind { entry: unreachable }
define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable }
+ define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable }
...
# +----------+
RET_ReallyLR
---
+...
+# CHECK-LABEL: name: save_restore_pregs_sve
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
+# CHECK: frame-setup STR_PXI killed $p6, $sp, 5
+# CHECK: frame-setup STR_PXI killed $p5, $sp, 6
+# CHECK: frame-setup STR_PXI killed $p4, $sp, 7
+# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+
+# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
+# CHECK: $p6 = frame-destroy LDR_PXI $sp, 5
+# CHECK: $p5 = frame-destroy LDR_PXI $sp, 6
+# CHECK: $p4 = frame-destroy LDR_PXI $sp, 7
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 1
+# CHECK: RET_ReallyLR
+name: save_restore_pregs_sve
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
+...
+# CHECK-LABEL: name: save_restore_zregs_sve
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK: frame-setup STR_ZXI killed $z10, $sp, 0
+# CHECK: frame-setup STR_ZXI killed $z9, $sp, 1
+# CHECK: frame-setup STR_ZXI killed $z8, $sp, 2
+# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+
+# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
+# CHECK: $z10 = frame-destroy LDR_ZXI $sp, 0
+# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 1
+# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 2
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 3
+# CHECK: RET_ReallyLR
+name: save_restore_zregs_sve
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $z8 = IMPLICIT_DEF
+ $z9 = IMPLICIT_DEF
+ $z10 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
+...
+# Test allocation/deallocation of the stack frame together with the
+# saving/restoring of callee save registers. Fixed-stack objects
+# are allocated before the callee-saves.
+# This also adds some non-SVE callee-saves, to ensure that those are
+# paired correctly.
+#
+# CHECK-LABEL: name: save_restore_sve
+# CHECK: $sp = frame-setup STPXpre killed ${{[a-z0-9]+}}, killed $x21, $sp, -4
+# CHECK: frame-setup STPXi killed $x20, killed $x19, $sp, 2
+# CHECK: $sp = frame-setup ADDVL_XXI $sp, -19
+# CHECK: frame-setup STR_PXI killed $p15, $sp, 4
+# CHECK: frame-setup STR_PXI killed $p14, $sp, 5
+# CHECK: frame-setup STR_PXI killed $p5, $sp, 14
+# CHECK: frame-setup STR_PXI killed $p4, $sp, 15
+# CHECK: frame-setup STR_ZXI killed $z23, $sp, 2
+# CHECK: frame-setup STR_ZXI killed $z22, $sp, 3
+# CHECK: frame-setup STR_ZXI killed $z9, $sp, 16
+# CHECK: frame-setup STR_ZXI killed $z8, $sp, 17
+# CHECK: $sp = frame-setup SUBXri $sp, 32, 0
+
+# CHECK: $sp = frame-destroy ADDXri $sp, 32, 0
+# CHECK: $p15 = frame-destroy LDR_PXI $sp, 4
+# CHECK: $p14 = frame-destroy LDR_PXI $sp, 5
+# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
+# CHECK: $p4 = frame-destroy LDR_PXI $sp, 15
+# CHECK: $z23 = frame-destroy LDR_ZXI $sp, 2
+# CHECK: $z22 = frame-destroy LDR_ZXI $sp, 3
+# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
+# CHECK: $z8 = frame-destroy LDR_ZXI $sp, 17
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 19
+# CHECK: $x20, $x19 = frame-destroy LDPXi $sp, 2
+# CHECK: $sp, ${{[a-z0-9]+}}, $x21 = frame-destroy LDPXpost $sp, 4
+# CHECK: RET_ReallyLR
+name: save_restore_sve
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+stack:
+ - { id: 0, stack-id: default, size: 32, alignment: 16 }
+body: |
+ bb.0.entry:
+
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ $x19 = IMPLICIT_DEF
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---
+...
+# Test allocation/deallocation of the stack frame together with the
+# saving/restoring of callee save registers. Fixed-stack objects
+# are allocated before the callee-saves.
+#
+# CHECK-LABEL: name: save_restore_sve_realign
+# CHECK: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -19
+# CHECK-NEXT: STR_PXI killed $p15, $sp, 4
+# CHECK-NEXT: STR_PXI killed $p14, $sp, 5
+# CHECK: STR_PXI killed $p5, $sp, 14
+# CHECK-NEXT: STR_PXI killed $p4, $sp, 15
+# CHECK-NEXT: STR_ZXI killed $z23, $sp, 2
+# CHECK-NEXT: STR_ZXI killed $z22, $sp, 3
+# CHECK: STR_ZXI killed $z9, $sp, 16
+# CHECK-NEXT: STR_ZXI killed $z8, $sp, 17
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+
+# CHECK: $sp = frame-destroy ADDVL_XXI $fp, -19
+# CHECK-NEXT: $p15 = frame-destroy LDR_PXI $sp, 4
+# CHECK-NEXT: $p14 = frame-destroy LDR_PXI $sp, 5
+# CHECK: $p5 = frame-destroy LDR_PXI $sp, 14
+# CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 15
+# CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2
+# CHECK-NEXT: $z22 = frame-destroy LDR_ZXI $sp, 3
+# CHECK: $z9 = frame-destroy LDR_ZXI $sp, 16
+# CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 17
+# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
+# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name: save_restore_sve_realign
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 32 }
+body: |
+ bb.0.entry:
+
+ $z8_z9_z10_z11 = IMPLICIT_DEF
+ $z12_z13_z14_z15 = IMPLICIT_DEF
+ $z16_z17_z18_z19 = IMPLICIT_DEF
+ $z20_z21_z22_z23 = IMPLICIT_DEF
+ $z24_z25_z26_z27 = IMPLICIT_DEF
+ $z28_z29_z30_z31 = IMPLICIT_DEF
+ $p4 = IMPLICIT_DEF
+ $p5 = IMPLICIT_DEF
+ $p6 = IMPLICIT_DEF
+ $p7 = IMPLICIT_DEF
+ $p8 = IMPLICIT_DEF
+ $p9 = IMPLICIT_DEF
+ $p10 = IMPLICIT_DEF
+ $p11 = IMPLICIT_DEF
+ $p12 = IMPLICIT_DEF
+ $p13 = IMPLICIT_DEF
+ $p14 = IMPLICIT_DEF
+ $p15 = IMPLICIT_DEF
+
+ RET_ReallyLR
+---