/// convention differs from the more common \c X86_64_SysV convention
/// in a number of ways, most notably in that XMM registers used to pass
/// arguments are shadowed by GPRs, and vice versa.
- X86_64_Win64 = 79
+ X86_64_Win64 = 79,
+
+ /// \brief MSVC calling convention that passes vectors and vector aggregates
+ /// in SSE registers.
+ X86_VectorCall = 80
};
} // End CallingConv namespace
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
KEYWORD(x86_thiscallcc);
+ KEYWORD(x86_vectorcallcc);
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
/// ::= 'x86_thiscallcc'
+/// ::= 'x86_vectorcallcc'
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+ case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
kw_intel_ocl_bicc,
- kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_vectorcallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
kw_ptx_kernel, kw_ptx_device,
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
+ case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
static void getNameWithPrefixx(raw_ostream &OS, const Twine &GVName,
Mangler::ManglerPrefixTy PrefixTy,
- const DataLayout &DL, bool UseAt) {
+ const DataLayout &DL, char Prefix) {
SmallString<256> TmpData;
StringRef Name = GVName.toStringRef(TmpData);
assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
else if (PrefixTy == Mangler::LinkerPrivate)
OS << DL.getLinkerPrivateGlobalPrefix();
- if (UseAt) {
- OS << '@';
- } else {
- char Prefix = DL.getGlobalPrefix();
- if (Prefix != '\0')
- OS << Prefix;
- }
+ if (Prefix != '\0')
+ OS << Prefix;
// If this is a simple string that doesn't need escaping, just append it.
OS << Name;
void Mangler::getNameWithPrefix(raw_ostream &OS, const Twine &GVName,
ManglerPrefixTy PrefixTy) const {
- return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, false);
+ char Prefix = DL->getGlobalPrefix();
+ return getNameWithPrefixx(OS, GVName, PrefixTy, *DL, Prefix);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
return getNameWithPrefix(OS, GVName, PrefixTy);
}
-/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
-/// a suffix on their name indicating the number of words of arguments they
-/// take.
-static void AddFastCallStdCallSuffix(raw_ostream &OS, const Function *F,
- const DataLayout &TD) {
+static bool hasByteCountSuffix(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::X86_FastCall:
+ case CallingConv::X86_StdCall:
+ case CallingConv::X86_VectorCall:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/// Microsoft fastcall and stdcall functions require a suffix on their name
+/// indicating the number of words of arguments they take.
+static void addByteCountSuffix(raw_ostream &OS, const Function *F,
+ const DataLayout &TD) {
// Calculate arguments size total.
unsigned ArgWords = 0;
for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
// 'Dereference' type in case of byval or inalloca parameter attribute.
if (AI->hasByValOrInAllocaAttr())
Ty = cast<PointerType>(Ty)->getElementType();
- // Size should be aligned to DWORD boundary
- ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+ // Size should be aligned to pointer size.
+ unsigned PtrSize = TD.getPointerSize();
+ ArgWords += RoundUpToAlignment(TD.getTypeAllocSize(Ty), PtrSize);
}
OS << '@' << ArgWords;
}
StringRef Name = GV->getName();
-
- bool UseAt = false;
- const Function *MSFunc = nullptr;
- CallingConv::ID CC;
- if (Name[0] != '\1' && DL->hasMicrosoftFastStdCallMangling()) {
- if ((MSFunc = dyn_cast<Function>(GV))) {
- CC = MSFunc->getCallingConv();
- // fastcall functions need to start with @ instead of _.
- if (CC == CallingConv::X86_FastCall)
- UseAt = true;
- }
+ char Prefix = DL->getGlobalPrefix();
+
+ // Mangle functions with Microsoft calling conventions specially. Only do
+ // this mangling for x86_64 vectorcall and 32-bit x86.
+ const Function *MSFunc = dyn_cast<Function>(GV);
+ if (Name.startswith("\01"))
+ MSFunc = nullptr; // Don't mangle when \01 is present.
+ CallingConv::ID CC = MSFunc ? MSFunc->getCallingConv() : CallingConv::C;
+ if (!DL->hasMicrosoftFastStdCallMangling() &&
+ CC != CallingConv::X86_VectorCall)
+ MSFunc = nullptr;
+ if (MSFunc) {
+ if (CC == CallingConv::X86_FastCall)
+ Prefix = '@'; // fastcall functions have an @ prefix instead of _.
+ else if (CC == CallingConv::X86_VectorCall)
+ Prefix = '\0'; // vectorcall functions have no prefix.
}
- getNameWithPrefixx(OS, Name, PrefixTy, *DL, UseAt);
+ getNameWithPrefixx(OS, Name, PrefixTy, *DL, Prefix);
if (!MSFunc)
return;
- // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
- // add it.
- // fastcall and stdcall functions usually need @42 at the end to specify
- // the argument info.
+ // If we are supposed to add a microsoft-style suffix for stdcall, fastcall,
+ // or vectorcall, add it. These functions have a suffix of @N where N is the
+ // cumulative byte size of all of the parameters to the function in decimal.
+ if (CC == CallingConv::X86_VectorCall)
+ OS << '@'; // vectorcall functions use a double @ suffix.
FunctionType *FT = MSFunc->getFunctionType();
- if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+ if (hasByteCountSuffix(CC) &&
// "Pure" variadic functions do not receive @0 suffix.
(!FT->isVarArg() || FT->getNumParams() == 0 ||
(FT->getNumParams() == 1 && MSFunc->hasStructRetAttr())))
- AddFastCallStdCallSuffix(OS, MSFunc, *DL);
+ addByteCountSuffix(OS, MSFunc, *DL);
}
void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
namespace llvm {
+inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ // Similar to CCPassIndirect, with the addition of inreg.
+ LocVT = MVT::i32;
+ LocInfo = CCValAssign::Indirect;
+ ArgFlags.setInReg();
+ return false; // Continue the search, but now for i32.
+}
+
+
inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
CCState &) {
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
]>;
+// X86-32 HiPE return-value convention.
+def RetCC_X86_32_VectorCall : CallingConv<[
+ // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+ // 256-bit FP vectors
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
+
+ // 512-bit FP vectors
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
+
+ // Return integers in the standard way.
+ CCDelegateTo<RetCC_X86Common>
+]>;
+
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
CCIfType<[f80], CCAssignToStack<0, 0>>
]>;
+def CC_X86_Win64_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_Win64_C>
+]>;
+
+
def CC_X86_64_GHC : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_VectorCall : CallingConv<[
+ // The first 6 floating point and vector types of 128 bits or less use
+ // XMM0-XMM5.
+ CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
+
+ // 256-bit vectors use YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
+
+ // 512-bit vectors use ZMM registers.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+
+ // Otherwise, pass it indirectly.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
+ v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
+ v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCCustom<"CC_X86_32_VectorCallIndirect">>,
+
+ // Delegate to fastcall to handle integer types.
+ CCDelegateTo<CC_X86_32_FastCall>
+]>;
+
def CC_X86_32_ThisCall_Common : CallingConv<[
// The first integer argument is passed in ECX
CCIfType<[i32], CCAssignToReg<[ECX]>>,
// This is the root argument convention for the X86-32 backend.
def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_X86_64_AnyReg>>,
CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
+ CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
--- /dev/null
+; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+
+; Test integer arguments.
+
+define x86_vectorcallcc i32 @test_int_1() {
+ ret i32 0
+}
+
+; CHECK-LABEL: {{^}}test_int_1@@0:
+; CHECK: xorl %eax, %eax
+
+define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
+ ret i32 %a
+}
+
+; X86-LABEL: {{^}}test_int_2@@4:
+; X64-LABEL: {{^}}test_int_2@@8:
+; CHECK: movl %ecx, %eax
+
+define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
+ %at = trunc i64 %a to i32
+ ret i32 %at
+}
+
+; X86-LABEL: {{^}}test_int_3@@8:
+; X64-LABEL: {{^}}test_int_3@@8:
+; CHECK: movl %ecx, %eax
+
+define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
+ %s = add i32 %a, %b
+ ret i32 %s
+}
+
+; X86-LABEL: {{^}}test_int_4@@8:
+; X86: leal (%ecx,%edx), %eax
+
+; X64-LABEL: {{^}}test_int_4@@16:
+; X64: leal (%rcx,%rdx), %eax
+
+define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
+ ret i32 0
+}
+; CHECK-LABEL: {{^}}test_int_5:
+
+define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
+ ret double %b
+}
+; CHECK-LABEL: {{^}}test_fp_1@@16:
+; CHECK: movaps %xmm1, %xmm0
+
+define x86_vectorcallcc double @test_fp_2(
+ double, double, double, double, double, double, double %r) {
+ ret double %r
+}
+; CHECK-LABEL: {{^}}test_fp_2@@56:
+; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
+
+define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
+ ret {double, double, double, double}
+ { double 0.0, double 0.0, double 0.0, double 0.0 }
+}
+; CHECK-LABEL: {{^}}test_fp_3@@0:
+; CHECK: xorps %xmm0
+; CHECK: xorps %xmm1
+; CHECK: xorps %xmm2
+; CHECK: xorps %xmm3
+
+; FIXME: Returning via x87 isn't compatible, but its hard to structure the
+; tablegen any other way.
+define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
+ ret {double, double, double, double, double}
+ { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
+}
+; CHECK-LABEL: {{^}}test_fp_4@@0:
+; CHECK: fldz
+; CHECK: xorps %xmm0
+; CHECK: xorps %xmm1
+; CHECK: xorps %xmm2
+; CHECK: xorps %xmm3
+
+define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
+ ret <16 x i8> %b
+}
+; CHECK-LABEL: {{^}}test_vec_1@@32:
+; CHECK: movaps %xmm1, %xmm0
+
+define x86_vectorcallcc <16 x i8> @test_vec_2(
+ double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
+ ret <16 x i8> %r
+}
+; CHECK-LABEL: {{^}}test_vec_2@@104:
+; CHECK: movaps (%{{[re]}}cx), %xmm0