CCDelegateTo<CC_AArch64_AAPCS>
]>;
+// Vararg functions on Arm64EC ABI use a different convention, using
+// a stack layout compatible with the x64 calling convention.
+let Entry = 1 in
+def CC_AArch64_Arm64EC_VarArg : CallingConv<[
+ // Convert small floating-point values to integer.
+ CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f64, v1f64, v1i64, v2f32, v2i32, v4i16, v4f16, v4bf16, v8i8, iPTR],
+ CCBitConvertToType<i64>>,
+
+ // Larger floating-point/vector values are passed indirectly.
+ CCIfType<[f128, v2f64, v2i64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
+ CCPassIndirect<i64>>,
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+ CCPassIndirect<i64>>,
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCPassIndirect<i64>>,
+
+ // Handle SRet. See comment in CC_AArch64_AAPCS.
+ CCIfInReg<CCIfType<[i64],
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit. (Shouldn't normally come up; the Microsoft ABI doesn't
+ // use byval.)
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote small integers to i32
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // Pass first four arguments in x0-x3.
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3]>>,
+
+ // Put remaining arguments on stack.
+ CCIfType<[i32, i64], CCAssignToStack<8, 8>>,
+]>;
+
// Windows Control Flow Guard checks take a single argument (the target function
// address) and have no return value.
let Entry = 1 in
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
- if (Subtarget->isTargetWindows() && IsVarArg)
+ if (Subtarget->isTargetWindows() && IsVarArg) {
+ if (Subtarget->isWindowsArm64EC())
+ return CC_AArch64_Arm64EC_VarArg;
return CC_AArch64_Win64_VarArg;
+ }
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
- return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
+ if (IsVarArg) {
+ if (Subtarget->isWindowsArm64EC())
+ return CC_AArch64_Arm64EC_VarArg;
+ return CC_AArch64_Win64_VarArg;
+ }
+ return CC_AArch64_AAPCS;
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ assert((VA.getValVT().isScalableVector() ||
+ Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
- int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
+ SDValue FIN;
+ MachinePointerInfo PtrInfo;
+ if (isVarArg && Subtarget->isWindowsArm64EC()) {
+ // In the ARM64EC varargs convention, fixed arguments on the stack are
+ // accessed relative to x4, not sp.
+ unsigned ObjOffset = ArgOffset + BEAlign;
+ Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ FIN = DAG.getNode(ISD::ADD, DL, MVT::i64, Val,
+ DAG.getConstant(ObjOffset, DL, MVT::i64));
+ PtrInfo = MachinePointerInfo::getUnknownStack(MF);
+ } else {
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
- // Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ // Create load nodes to retrieve arguments from the stack.
+ FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ }
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ assert((VA.getValVT().isScalableVector() ||
+ Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
MemVT = VA.getLocVT();
break;
case CCValAssign::SExt:
break;
}
- ArgValue =
- DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI), MemVT);
+ ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, PtrInfo,
+ MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ assert(
+ (VA.getValVT().isScalableVector() || Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
unsigned NumParts = 1;
InVals.push_back(ArgValue);
NumParts--;
if (NumParts > 0) {
- SDValue BytesIncrement = DAG.getVScale(
- DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDValue BytesIncrement;
+ if (PartLoad.isScalableVector()) {
+ BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ } else {
+ BytesIncrement = DAG.getConstant(
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize), DL,
+ Ptr.getValueType());
+ }
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
unsigned StackOffset = CCInfo.getNextStackOffset();
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
+ FuncInfo->setVarArgsStackOffset(StackOffset);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
- static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
+ unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
+ if (Subtarget->isWindowsArm64EC()) {
+ // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
+ // functions.
+ NumGPRArgRegs = 4;
+ }
unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
- SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
+ SDValue FIN;
+ if (Subtarget->isWindowsArm64EC()) {
+ // With the Arm64EC ABI, we reserve the save area as usual, but we
+ // compute its address relative to x4. For a normal AArch64->AArch64
+ // call, x4 == sp on entry, but calls from an entry thunk can pass in a
+ // different address.
+ Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ FIN = DAG.getNode(ISD::SUB, DL, MVT::i64, Val,
+ DAG.getConstant(GPRSaveSize, DL, MVT::i64));
+ } else {
+ FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
+ }
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
// allocate space on the stack. That is why we determine this explicitly here
// the call cannot be a tailcall.
- if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
+ if (llvm::any_of(ArgLocs, [&](CCValAssign &A) {
assert((A.getLocInfo() != CCValAssign::Indirect ||
- A.getValVT().isScalableVector()) &&
+ A.getValVT().isScalableVector() ||
+ Subtarget->isWindowsArm64EC()) &&
"Expected value to be scalable");
return A.getLocInfo() == CCValAssign::Indirect;
}))
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::Indirect:
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ bool isScalable = VA.getValVT().isScalableVector();
+ assert((isScalable || Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
uint64_t PartSize = StoreSize;
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
- MFI.setStackID(FI, TargetStackID::ScalableVector);
+ if (isScalable)
+ MFI.setStackID(FI, TargetStackID::ScalableVector);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
NumParts--;
if (NumParts > 0) {
- SDValue BytesIncrement = DAG.getVScale(
- DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDValue BytesIncrement;
+ if (isScalable) {
+ BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ } else {
+ BytesIncrement = DAG.getConstant(
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize), DL,
+ Ptr.getValueType());
+ }
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
}
}
+ if (IsVarArg && Subtarget->isWindowsArm64EC()) {
+ // For vararg calls, the Arm64EC ABI requires values in x4 and x5
+ // describing the argument list. x4 contains the address of the
+ // first stack parameter. x5 contains the size in bytes of all parameters
+ // passed on the stack.
+ RegsToPass.emplace_back(AArch64::X4, StackPtr);
+ RegsToPass.emplace_back(AArch64::X5,
+ DAG.getConstant(NumBytes, DL, MVT::i64));
+ }
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
SelectionDAG &DAG) const {
- AArch64FunctionInfo *FuncInfo =
- DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
- SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
- ? FuncInfo->getVarArgsGPRIndex()
- : FuncInfo->getVarArgsStackIndex(),
- getPointerTy(DAG.getDataLayout()));
+ SDValue FR;
+ if (Subtarget->isWindowsArm64EC()) {
+ // With the Arm64EC ABI, we compute the address of the varargs save area
+ // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
+ // but calls from an entry thunk can pass in a different address.
+ Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(DAG.getEntryNode(), DL, VReg, MVT::i64);
+ uint64_t StackOffset;
+ if (FuncInfo->getVarArgsGPRSize() > 0)
+ StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
+ else
+ StackOffset = FuncInfo->getVarArgsStackOffset();
+ FR = DAG.getNode(ISD::ADD, DL, MVT::i64, Val,
+ DAG.getConstant(StackOffset, DL, MVT::i64));
+ } else {
+ FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
+ ? FuncInfo->getVarArgsGPRIndex()
+ : FuncInfo->getVarArgsStackIndex(),
+ getPointerTy(DAG.getDataLayout()));
+ }
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm64ec-pc-windows-msvc < %s | FileCheck %s
+; RUN: llc -mtriple=arm64ec-pc-windows-msvc < %s -global-isel=1 -global-isel-abort=0 | FileCheck %s
+
+define void @varargs_callee(double %x, ...) nounwind {
+; CHECK-LABEL: varargs_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x1, x2, [x4, #-24]!
+; CHECK-NEXT: str x3, [x4, #16]
+; CHECK-NEXT: str x4, [sp, #8]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %list = alloca i8*, align 8
+ %listx = bitcast i8** %list to i8*
+ call void @llvm.va_start(i8* nonnull %listx)
+ ret void
+}
+
+define void @varargs_callee_manyargs(i64, i64, i64, i64, i64, ...) nounwind {
+; CHECK-LABEL: varargs_callee_manyargs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: add x8, x4, #8
+; CHECK-NEXT: str x8, [sp, #8]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %list = alloca i8*, align 8
+ %listx = bitcast i8** %list to i8*
+ call void @llvm.va_start(i8* nonnull %listx)
+ ret void
+}
+
+define void @varargs_caller() nounwind {
+; CHECK-LABEL: varargs_caller:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: mov x9, #4617315517961601024
+; CHECK-NEXT: mov x0, #4607182418800017408
+; CHECK-NEXT: mov w1, #2
+; CHECK-NEXT: mov x2, #4613937818241073152
+; CHECK-NEXT: mov w3, #4
+; CHECK-NEXT: mov w5, #16
+; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: stp x8, xzr, [sp, #8]
+; CHECK-NEXT: str x9, [sp]
+; CHECK-NEXT: bl varargs_callee
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
+ ret void
+}
+
+define <2 x double> @varargs_many_argscallee(double %a, double %b, double %c,
+; CHECK-LABEL: varargs_many_argscallee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x4]
+; CHECK-NEXT: ldr q0, [x3]
+; CHECK-NEXT: ldr q1, [x8]
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ <2 x double> %d, <2 x double> %e, ...) nounwind {
+ %rval = fadd <2 x double> %d, %e
+ ret <2 x double> %rval
+}
+
+define void @varargs_many_argscalleer() nounwind {
+; CHECK-LABEL: varargs_many_argscalleer:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: mov x8, #4618441417868443648
+; CHECK-NEXT: add x9, sp, #16
+; CHECK-NEXT: add x3, sp, #32
+; CHECK-NEXT: mov x0, #4607182418800017408
+; CHECK-NEXT: mov x1, #4611686018427387904
+; CHECK-NEXT: mov x2, #4613937818241073152
+; CHECK-NEXT: mov w5, #16
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp q0, q0, [sp, #16]
+; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: bl varargs_many_argscallee
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ call <2 x double> (double, double, double, <2 x double>, <2 x double>, ...)
+ @varargs_many_argscallee(double 1., double 2., double 3.,
+ <2 x double> zeroinitializer,
+ <2 x double> zeroinitializer, double 6.)
+ ret void
+}
+
+
+declare void @llvm.va_start(i8*)