const auto &MFI = MF.getFrameInfo();
int ObjectOffset = MFI.getObjectOffset(FI);
bool isFixed = MFI.isFixedObjectIndex(FI);
- return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
+ bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
+ return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
PreferFP, ForSimm);
}
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
- const MachineFunction &MF, int ObjectOffset, bool isFixed,
+ const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE,
unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
const auto &MFI = MF.getFrameInfo();
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
const StackOffset &SVEStackSize = getSVEStackSize(MF);
- if (SVEStackSize)
- llvm_unreachable("Accessing frame indices in presence of SVE "
- "not yet supported");
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
// reliable as a base). Make sure useFPForScavengingIndex() does the
// right thing for the emergency spill slot.
bool UseFP = false;
- if (AFI->hasStackFrame()) {
+ if (AFI->hasStackFrame() && !isSVE) {
+ // We shouldn't prefer using the FP when there is an SVE area
+ // in between the FP and the non-SVE locals/spills.
+ PreferFP &= !SVEStackSize;
+
// Note: Keeping the following as multiple 'if' statements rather than
// merging to a single expression for readability.
//
bool CanUseBP = RegInfo->hasBasePointer(MF);
if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
UseFP = PreferFP;
- else if (!CanUseBP) // Can't use BP. Forced to use FP.
+ else if (!CanUseBP) { // Can't use BP. Forced to use FP.
+ assert(!SVEStackSize && "Expected BP to be available");
UseFP = true;
+ }
// else we can use BP and FP, but the offset from FP won't fit.
// That will make us scavenge registers which we can probably avoid by
// using BP. If it won't fit for BP either, we'll scavenge anyway.
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");
+ if (isSVE) {
+ int64_t OffsetToSVEArea =
+ MFI.getStackSize() - AFI->getCalleeSavedStackSize();
+ StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8};
+ StackOffset SPOffset = SVEStackSize +
+ StackOffset(ObjectOffset, MVT::nxv1i8) +
+ StackOffset(OffsetToSVEArea, MVT::i8);
+ // Always use the FP for SVE spills if available and beneficial.
+ if (hasFP(MF) &&
+ (SPOffset.getBytes() ||
+ FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
+ RegInfo->needsStackRealignment(MF))) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
+ }
+
+ FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
+ : (unsigned)AArch64::SP;
+ return SPOffset;
+ }
+
+ StackOffset ScalableOffset = {};
+ if (UseFP && !(isFixed || isCSR))
+ ScalableOffset = -SVEStackSize;
+ if (!UseFP && (isFixed || isCSR))
+ ScalableOffset = SVEStackSize;
+
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
- return StackOffset(FPOffset, MVT::i8);
+ return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
}
// Use the base pointer if we have one.
Offset -= AFI->getLocalStackSize();
}
- return StackOffset(Offset, MVT::i8);
+ return StackOffset(Offset, MVT::i8) + ScalableOffset;
}
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
<< ' ' << printReg(Reg, RegInfo);
dbgs() << "\n";);
- bool HasSVEStackObjects = [&MFI]() {
- for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
- if (MFI.getStackID(I) == TargetStackID::SVEVector &&
- MFI.getObjectOffset(I) < 0)
- return true;
- // Note: We don't take allocatable stack objects into
- // account yet, because allocation for those is not yet
- // implemented.
- return false;
- }();
-
// If any callee-saved registers are used, the frame cannot be eliminated.
- bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
+ unsigned MaxAlign = getStackAlignment();
+ int64_t SVEStackSize =
+ alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
+ assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+ bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
- bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
+
+ // Conservatively always assume BigStack when there are SVE spills.
+ bool BigStack = SVEStackSize ||
+ (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
return AFI->hasCalleeSaveStackFreeSpace();
}
+int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
+ unsigned &MaxAlign) const {
+ // Process all fixed stack objects.
+ int64_t Offset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+ if (MFI.getStackID(I) == TargetStackID::SVEVector) {
+ int64_t FixedOffset = -MFI.getObjectOffset(I);
+ if (FixedOffset > Offset)
+ Offset = FixedOffset;
+ }
+
+ // Note: We don't take allocatable stack objects into
+ // account yet, because allocation for those is not yet
+ // implemented.
+ return Offset;
+}
+
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
"Upwards growing stack unsupported");
- // Process all fixed stack SVE objects.
- int64_t Offset = 0;
- for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
- unsigned StackID = MFI.getStackID(I);
- if (StackID == TargetStackID::SVEVector) {
- int64_t FixedOffset = -MFI.getObjectOffset(I);
- if (FixedOffset > Offset)
- Offset = FixedOffset;
- }
- }
-
unsigned MaxAlign = getStackAlignment();
- uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
+ int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
- AFI->setStackSizeSVE(SVEStackSize);
+ AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
// If this function isn't doing Win64-style C++ EH, we don't need to do
define void @test_allocate_sve() nounwind { entry: unreachable }
define void @test_allocate_sve_gpr_callee_saves() nounwind { entry: unreachable }
define void @test_allocate_sve_gpr_realigned() nounwind { entry: unreachable }
+ define void @test_address_sve() nounwind { entry: unreachable }
+ define void @test_address_sve_fp() nounwind { entry: unreachable }
+ define void @test_stack_arg_sve() nounwind { entry: unreachable }
+ define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
...
# +----------+
+# |scratchreg| // x29 is used as scratch reg.
+# +----------+
# | %fixed- | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
# | stack.0 | // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
# +----------+
# +----------+ <- SP
# CHECK-LABEL: name: test_allocate_sve
-# CHECK: stackSize: 16
+# CHECK: stackSize: 32
# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
# CHECK-NEXT: RET_ReallyLR
name: test_allocate_sve
fixedStack:
...
# +----------+
# | x20, x21 | // callee saves
+# |scratchreg| // x29 is used as scratch reg.
# +----------+
# | %fixed- | // scalable objects
# | stack.0 |
# +----------+ <- SP
# CHECK-LABEL: name: test_allocate_sve_gpr_callee_saves
-# CHECK: stackSize: 32
+# CHECK: stackSize: 48
# CHECK: bb.0.entry:
-# CHECK-NEXT: $sp = frame-setup STPXpre killed $x21, killed $x20, $sp, -2
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32
+# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
# CHECK-NEXT: $x20 = IMPLICIT_DEF
# CHECK-NEXT: $x21 = IMPLICIT_DEF
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
-# CHECK-NEXT: $sp, $x21, $x20 = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32
# CHECK-NEXT: RET_ReallyLR
name: test_allocate_sve_gpr_callee_saves
fixedStack:
bb.0.entry:
RET_ReallyLR
---
+...
+# +----------+
+# | x20, x21 | // callee saves
+# +----------+
+# | %stack.0 | // scalable @ SP + 16b + 32 scalable bytes
+# | %stack.1 | // scalable @ SP + 16b + 16 scalable bytes
+# | %stack.2 | // scalable @ SP + 16b + 14 scalable bytes
+# +----------+
+# | %stack.0 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_address_sve
+# CHECK: stackSize: 32
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
+# CHECK-NEXT: RET_ReallyLR
+name: test_address_sve
+frameInfo:
+ maxAlignment: 16
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
+ - { id: 1, stack-id: sve-vec, size: 16, alignment: 8, offset: -32 }
+ - { id: 2, stack-id: sve-vec, size: 2, alignment: 2, offset: -34 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ liveins: $z0, $z1, $p0
+
+ STR_ZXI $z0, %fixed-stack.0, 0
+ STR_ZXI $z1, %fixed-stack.1, 0
+ STR_PXI $p0, %fixed-stack.2, 0
+
+ RET_ReallyLR
+---
+...
+# +-----------+
+# | x20, x21 | // callee saves
+# | lr, fp | // frame record
+# +-----------+ <- FP
+# | %fstack.0 | // scalable @ FP - 16 scalable bytes
+# | %fstack.1 | // scalable @ FP - 32 scalable bytes
+# | %fstack.2 | // scalable @ FP - 34 scalable bytes
+# +-----------+
+# | %stack.0 | // not scalable
+# +-----------+ <- SP
+
+# CHECK-LABEL: name: test_address_sve_fp
+# CHECK: stackSize: 32
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK-NEXT: STR_ZXI $z0, $fp, -1
+# CHECK-NEXT: STR_ZXI $z1, $fp, -2
+# CHECK-NEXT: STR_PXI $p0, $fp, -17
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
+# CHECK: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name: test_address_sve_fp
+frameInfo:
+ maxAlignment: 16
+ isFrameAddressTaken: true
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
+ - { id: 1, stack-id: sve-vec, size: 16, alignment: 8, offset: -32 }
+ - { id: 2, stack-id: sve-vec, size: 2, alignment: 2, offset: -34 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ liveins: $z0, $z1, $p0
+
+ STR_ZXI $z0, %fixed-stack.0, 0
+ STR_ZXI $z1, %fixed-stack.1, 0
+ STR_PXI $p0, %fixed-stack.2, 0
+
+ RET_ReallyLR
+---
+...
+# +-----------+
+# | %fstack.1 | // stack arg @ SP + 16 scalable bytes + 32 bytes.
+# +-----------+
+# |callee save| // register saved as scratch reg.
+# +-----------+
+# | %fstack.1 | // vector of 16 scalable bytes
+# +---------- +
+# | %stack.0 | // not scalable, 16 bytes
+# +-----------+ <- SP
+# CHECK-LABEL: name: test_stack_arg_sve
+# CHECK: stackSize: 32
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
+# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
+# CHECK-NEXT: RET_ReallyLR
+name: test_stack_arg_sve
+fixedStack:
+ - { id: 0, stack-id: default, size: 16, alignment: 16, offset: 0 }
+ - { id: 1, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 16 }
+body: |
+ bb.0.entry:
+ liveins: $x0
+
+ $x0 = LDRXui %fixed-stack.0, 0
+ RET_ReallyLR
+---
+...
+# Test that the address to access an SVE data vector at an offset that
+# does not fit its immediate, is correctly materialized.
+# +----------+
+# |calleesave| // register saved as scratch reg.
+# +----------+
+# | %stack.0 | // one SVE data object @ SP + 256 scalable bytes.
+# |::::::::::|
+# |: :|
+# |:%stack.1:| // Large object
+# |: :|
+# |::::::::::|
+# +----------+ <- SP
+# CHECK-LABEL: name: test_address_sve_out_of_range
+# CHECK: stackSize: 16
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
+
+# CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDVL_XXI $sp, 1
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP2]], 255
+
+# CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDPL_XXI $sp, 1
+# CHECK-NEXT: STR_PXI $p0, killed $[[TMP2]], 255
+
+# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 9
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
+# CHECK-NEXT: RET_ReallyLR
+name: test_address_sve_out_of_range
+frameInfo:
+ maxAlignment: 16
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+ - { id: 1, stack-id: sve-vec, size: 3584, alignment: 16, offset: -3600 }
+ - { id: 2, stack-id: sve-vec, size: 512, alignment: 16, offset: -4112 }
+
+body: |
+ bb.0.entry:
+ liveins: $z0, $p0
+
+ STR_ZXI $z0, %fixed-stack.0, 0
+ STR_PXI $p0, %fixed-stack.1, 0
+
+ RET_ReallyLR
+---