From: Tim Northover Date: Sat, 24 May 2014 12:51:25 +0000 (+0000) Subject: AArch64/ARM64: update Clang after AArch64 removal. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=25e8a6754e3f4c447ddfe5b742c01c16cb050b67;p=platform%2Fupstream%2Fllvm.git AArch64/ARM64: update Clang after AArch64 removal. A few (mostly CodeGen) parts of Clang were tightly coupled to the AArch64 backend. Now that it's gone, they will not even compile. I've also deduplicated RUN lines in many of the AArch64 tests. This might improve "make check-all" time noticably: some of those NEON tests were monsters. llvm-svn: 209578 --- diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def deleted file mode 100644 index a0a0a5d..0000000 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ /dev/null @@ -1,20 +0,0 @@ -//===-- BuiltinsAArch64.def - AArch64 Builtin function database -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the AArch64-specific builtin function database. Users of -// this file must define the BUILTIN macro to make use of this information. -// -//===----------------------------------------------------------------------===// - -// The format of this database matches clang/Basic/Builtins.def. - -// In libgcc -BUILTIN(__clear_cache, "vv*v*", "i") - -#undef BUILTIN diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index e6cc9ab..50329d4 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -30,16 +30,6 @@ namespace clang { }; } - /// \brief AArch64 builtins - namespace AArch64 { - enum { - LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1, - LastNEONBuiltin = NEON::FirstTSBuiltin - 1, -#define BUILTIN(ID, TYPE, ATTRS) BI##ID, -#include "clang/Basic/BuiltinsAArch64.def" - LastTSBuiltin - }; - } /// \brief ARM builtins namespace ARM { enum { diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 578cc1f..053e655 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -8126,7 +8126,6 @@ private: bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckARM64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); - bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckMipsBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index f1a16b5..58a44b7 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2300,10 +2300,10 @@ void CXXNameMangler::mangleType(const VectorType *T) { llvm::Triple Target = getASTContext().getTargetInfo().getTriple(); llvm::Triple::ArchType Arch = getASTContext().getTargetInfo().getTriple().getArch(); - if (Arch == llvm::Triple::aarch64 || - Arch == llvm::Triple::aarch64_be || - Arch == llvm::Triple::arm64_be || - (Arch == llvm::Triple::arm64 && !Target.isOSDarwin())) + if ((Arch == llvm::Triple::aarch64 || + Arch == llvm::Triple::aarch64_be || + Arch == llvm::Triple::arm64_be || + Arch == llvm::Triple::arm64) && !Target.isOSDarwin()) mangleAArch64NeonVectorType(T); else mangleNeonVectorType(T); diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 62d44be..82d79f7 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -3408,289 +3408,6 @@ public: }; } -namespace { -class AArch64TargetInfo : public TargetInfo { - virtual void setDescriptionString() = 0; - static const char * const GCCRegNames[]; - static const TargetInfo::GCCRegAlias GCCRegAliases[]; - - enum FPUModeEnum { - FPUMode, - NeonMode - }; - - unsigned FPU; - unsigned CRC; - unsigned Crypto; - static const Builtin::Info BuiltinInfo[]; - -public: - AArch64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) { - LongWidth = LongAlign = 64; - LongDoubleWidth = LongDoubleAlign = 128; - PointerWidth = PointerAlign = 64; - SuitableAlign = 128; - - WCharType = UnsignedInt; - if (getTriple().getOS() == llvm::Triple::NetBSD) { - WCharType = SignedInt; - Int64Type = SignedLongLong; - IntMaxType = SignedLongLong; - UIntMaxType = UnsignedLongLong; - } else { - WCharType = UnsignedInt; - Int64Type = SignedLong; - IntMaxType = SignedLong; - UIntMaxType = UnsignedLong; - } - LongDoubleFormat = &llvm::APFloat::IEEEquad; - - // AArch64 backend supports 64-bit operations at the moment. In principle - // 128-bit is possible if register-pairs are used. - MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; - - TheCXXABI.set(TargetCXXABI::GenericAArch64); - } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override { - // GCC defines theses currently - Builder.defineMacro("__aarch64__"); - - // ACLE predefines. Many can only have one possible value on v8 AArch64. - Builder.defineMacro("__ARM_ACLE", "200"); - Builder.defineMacro("__ARM_ARCH", "8"); - Builder.defineMacro("__ARM_ARCH_PROFILE", "'A'"); - - Builder.defineMacro("__ARM_64BIT_STATE"); - Builder.defineMacro("__ARM_PCS_AAPCS64"); - Builder.defineMacro("__ARM_ARCH_ISA_A64"); - - Builder.defineMacro("__ARM_FEATURE_UNALIGNED"); - Builder.defineMacro("__ARM_FEATURE_CLZ"); - Builder.defineMacro("__ARM_FEATURE_FMA"); - Builder.defineMacro("__ARM_FEATURE_DIV"); - - Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4"); - - // 0xe implies support for half, single and double precision operations. - Builder.defineMacro("__ARM_FP", "0xe"); - - // PCS specifies this for SysV variants, which is all we support. Other ABIs - // may choose __ARM_FP16_FORMAT_ALTERNATIVE. - Builder.defineMacro("__ARM_FP16_FORMAT_IEEE"); - - if (Opts.FastMath || Opts.FiniteMathOnly) - Builder.defineMacro("__ARM_FP_FAST"); - - if ((Opts.C99 || Opts.C11) && !Opts.Freestanding) - Builder.defineMacro("__ARM_FP_FENV_ROUNDING"); - - Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", - Opts.ShortWChar ? "2" : "4"); - - Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", - Opts.ShortEnums ? "1" : "4"); - - if (FPU == NeonMode) { - Builder.defineMacro("__ARM_NEON"); - // 64-bit NEON supports half, single and double precision operations. - Builder.defineMacro("__ARM_NEON_FP", "0xe"); - } - - if (CRC) - Builder.defineMacro("__ARM_FEATURE_CRC32"); - - if (Crypto) { - Builder.defineMacro("__ARM_FEATURE_CRYPTO"); - } - } - void getTargetBuiltins(const Builtin::Info *&Records, - unsigned &NumRecords) const override { - Records = BuiltinInfo; - NumRecords = clang::AArch64::LastTSBuiltin-Builtin::FirstTSBuiltin; - } - bool hasFeature(StringRef Feature) const override { - return Feature == "aarch64" || (Feature == "neon" && FPU == NeonMode); - } - - bool setCPU(const std::string &Name) override { - return llvm::StringSwitch(Name) - .Case("generic", true) - .Cases("cortex-a53", "cortex-a57", true) - .Default(false); - } - - bool handleTargetFeatures(std::vector &Features, - DiagnosticsEngine &Diags) override { - FPU = FPUMode; - CRC = 0; - Crypto = 0; - for (unsigned i = 0, e = Features.size(); i != e; ++i) { - if (Features[i] == "+neon") - FPU = NeonMode; - if (Features[i] == "+crc") - CRC = 1; - if (Features[i] == "+crypto") - Crypto = 1; - } - - setDescriptionString(); - - return true; - } - - void getGCCRegNames(const char *const *&Names, - unsigned &NumNames) const override; - void getGCCRegAliases(const GCCRegAlias *&Aliases, - unsigned &NumAliases) const override; - - bool isCLZForZeroUndef() const override { return false; } - - bool validateAsmConstraint(const char *&Name, - TargetInfo::ConstraintInfo &Info) const override { - switch (*Name) { - default: return false; - case 'w': // An FP/SIMD vector register - Info.setAllowsRegister(); - return true; - case 'I': // Constant that can be used with an ADD instruction - case 'J': // Constant that can be used with a SUB instruction - case 'K': // Constant that can be used with a 32-bit logical instruction - case 'L': // Constant that can be used with a 64-bit logical instruction - case 'M': // Constant that can be used as a 32-bit MOV immediate - case 'N': // Constant that can be used as a 64-bit MOV immediate - case 'Y': // Floating point constant zero - case 'Z': // Integer constant zero - return true; - case 'Q': // A memory reference with base register and no offset - Info.setAllowsMemory(); - return true; - case 'S': // A symbolic address - Info.setAllowsRegister(); - return true; - case 'U': - // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes, whatever they may be - // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be - // Usa: An absolute symbolic address - // Ush: The high part (bits 32:12) of a pc-relative symbolic address - llvm_unreachable("FIXME: Unimplemented support for bizarre constraints"); - } - } - - const char *getClobbers() const override { - // There are no AArch64 clobbers shared by all asm statements. - return ""; - } - - BuiltinVaListKind getBuiltinVaListKind() const override { - return TargetInfo::AArch64ABIBuiltinVaList; - } -}; - -const char * const AArch64TargetInfo::GCCRegNames[] = { - "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7", - "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15", - "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23", - "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wsp", "wzr", - - "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", - "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", - "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", - "x24", "x25", "x26", "x27", "x28", "x29", "x30", "sp", "xzr", - - "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7", - "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15", - "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23", - "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31", - - "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7", - "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15", - "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23", - "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31", - - "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", - "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", - "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", - "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", - - "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", - "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", - "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", - "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", - - "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", - "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", - "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23", - "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" -}; - -void AArch64TargetInfo::getGCCRegNames(const char * const *&Names, - unsigned &NumNames) const { - Names = GCCRegNames; - NumNames = llvm::array_lengthof(GCCRegNames); -} - -const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = { - { { "x16" }, "ip0"}, - { { "x17" }, "ip1"}, - { { "x29" }, "fp" }, - { { "x30" }, "lr" } -}; - -void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases, - unsigned &NumAliases) const { - Aliases = GCCRegAliases; - NumAliases = llvm::array_lengthof(GCCRegAliases); - -} - -const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = { -#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, -#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\ - ALL_LANGUAGES }, -#include "clang/Basic/BuiltinsNEON.def" - -#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES }, -#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\ - ALL_LANGUAGES }, -#include "clang/Basic/BuiltinsAArch64.def" -}; - -class AArch64leTargetInfo : public AArch64TargetInfo { - void setDescriptionString() override { - DescriptionString = "e-m:e-i64:64-i128:128-n32:64-S128"; - } - -public: - AArch64leTargetInfo(const llvm::Triple &Triple) - : AArch64TargetInfo(Triple) { - BigEndian = false; - } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override { - Builder.defineMacro("__AARCH64EL__"); - AArch64TargetInfo::getTargetDefines(Opts, Builder); - } -}; - -class AArch64beTargetInfo : public AArch64TargetInfo { - void setDescriptionString() override { - DescriptionString = "E-m:e-i64:64-i128:128-n32:64-S128"; - } - -public: - AArch64beTargetInfo(const llvm::Triple &Triple) - : AArch64TargetInfo(Triple) { } - void getTargetDefines(const LangOptions &Opts, - MacroBuilder &Builder) const override { - Builder.defineMacro("__AARCH64EB__"); - Builder.defineMacro("__AARCH_BIG_ENDIAN"); - Builder.defineMacro("__ARM_BIG_ENDIAN"); - AArch64TargetInfo::getTargetDefines(Opts, Builder); - } -}; - -} // end anonymous namespace namespace { class ARMTargetInfo : public TargetInfo { @@ -4537,11 +4254,23 @@ class ARM64TargetInfo : public TargetInfo { public: ARM64TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple), ABI("aapcs") { + + if (getTriple().getOS() == llvm::Triple::NetBSD) { + WCharType = SignedInt; + + // NetBSD apparently prefers consistency across ARM targets to consistency + // across 64-bit targets. + Int64Type = SignedLongLong; + IntMaxType = SignedLongLong; + UIntMaxType = UnsignedLongLong; + } else { + WCharType = UnsignedInt; + Int64Type = SignedLong; + IntMaxType = SignedLong; + UIntMaxType = UnsignedLong; + } + LongWidth = LongAlign = PointerWidth = PointerAlign = 64; - IntMaxType = SignedLong; - UIntMaxType = UnsignedLong; - Int64Type = SignedLong; - WCharType = UnsignedInt; MaxVectorAlign = 128; RegParmMax = 8; MaxAtomicInlineWidth = 128; @@ -6218,21 +5947,21 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) { case llvm::Triple::aarch64: switch (os) { case llvm::Triple::Linux: - return new LinuxTargetInfo(Triple); + return new LinuxTargetInfo(Triple); case llvm::Triple::NetBSD: - return new NetBSDTargetInfo(Triple); + return new NetBSDTargetInfo(Triple); default: - return new AArch64leTargetInfo(Triple); + return new ARM64leTargetInfo(Triple); } case llvm::Triple::aarch64_be: switch (os) { case llvm::Triple::Linux: - return new LinuxTargetInfo(Triple); + return new LinuxTargetInfo(Triple); case llvm::Triple::NetBSD: - return new NetBSDTargetInfo(Triple); + return new NetBSDTargetInfo(Triple); default: - return new AArch64beTargetInfo(Triple); + return new ARM64beTargetInfo(Triple); } case llvm::Triple::arm: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9d692d8..585db17 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -1637,14 +1637,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (getTarget().getTriple().getArch()) { - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return EmitAArch64BuiltinExpr(BuiltinID, E); case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: return EmitARMBuiltinExpr(BuiltinID, E); + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: return EmitARM64BuiltinExpr(BuiltinID, E); @@ -1883,354 +1882,6 @@ enum { Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ #NameBase, TypeModifier } -static const NeonIntrinsicInfo AArch64SISDIntrinsicInfo[] = { - NEONMAP1(vabdd_f64, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabds_f32, aarch64_neon_vabd, AddRetType), - NEONMAP1(vabsd_s64, aarch64_neon_vabs, 0), - NEONMAP1(vaddd_s64, aarch64_neon_vaddds, 0), - NEONMAP1(vaddd_u64, aarch64_neon_vadddu, 0), - NEONMAP1(vaddlv_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlv_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s16, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_s8, aarch64_neon_saddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u16, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddlvq_u8, aarch64_neon_uaddlv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddv_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddv_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vaddvq_s16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_s8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u16, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u32, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u64, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vaddvq_u8, aarch64_neon_vaddv, VectorRet | Add1ArgType), - NEONMAP1(vcaged_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcages_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcagtd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcagts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcaled_f64, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcales_f32, aarch64_neon_fcage, VectorRet | Add2ArgTypes), - NEONMAP1(vcaltd_f64, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vcalts_f32, aarch64_neon_fcagt, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_f64, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqs_f32, aarch64_neon_fceq, VectorRet | Add2ArgTypes), - NEONMAP1(vceqzd_f64, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vceqzd_s64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzd_u64, aarch64_neon_vceq, VectorRetGetArgs01), - NEONMAP1(vceqzs_f32, aarch64_neon_fceq, FpCmpzModifiers), - NEONMAP1(vcged_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcged_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcged_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcges_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcgezd_f64, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgezd_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcgezs_f32, aarch64_neon_fcge, FpCmpzModifiers), - NEONMAP1(vcgtd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vcgts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcgtzd_f64, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcgtzd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcgtzs_f32, aarch64_neon_fcgt, FpCmpzModifiers), - NEONMAP1(vcled_f64, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vcled_s64, aarch64_neon_vcge, VectorRetGetArgs01), - NEONMAP1(vcled_u64, aarch64_neon_vchs, VectorRetGetArgs01), - NEONMAP1(vcles_f32, aarch64_neon_fcge, VectorRet | Add2ArgTypes), - NEONMAP1(vclezd_f64, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vclezd_s64, aarch64_neon_vclez, VectorRetGetArgs01), - NEONMAP1(vclezs_f32, aarch64_neon_fclez, FpCmpzModifiers), - NEONMAP1(vcltd_f64, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltd_s64, aarch64_neon_vcgt, VectorRetGetArgs01), - NEONMAP1(vcltd_u64, aarch64_neon_vchi, VectorRetGetArgs01), - NEONMAP1(vclts_f32, aarch64_neon_fcgt, VectorRet | Add2ArgTypes), - NEONMAP1(vcltzd_f64, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcltzd_s64, aarch64_neon_vcltz, VectorRetGetArgs01), - NEONMAP1(vcltzs_f32, aarch64_neon_fcltz, FpCmpzModifiers), - NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, VectorRet | Add1ArgType), - NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_f64_s64, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_f64_u64, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, VectorRet | Add1ArgType), - NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, VectorRet | Add1ArgType), - NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, VectorRet | Add1ArgType), - NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, VectorRet | Add1ArgType), - NEONMAP1(vcvts_f32_s32, aarch64_neon_vcvtint2fps, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_f32_u32, aarch64_neon_vcvtint2fpu, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp_n, AddRetType | Vectorize1ArgType), - NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu_n, VectorRet | Add1ArgType), - NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, VectorRet | Add1ArgType), - NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, VectorRet | Add1ArgType), - NEONMAP1(vcvtxd_f32_f64, aarch64_neon_fcvtxn, 0), - NEONMAP0(vdupb_lane_i8), - NEONMAP0(vdupb_laneq_i8), - NEONMAP0(vdupd_lane_f64), - NEONMAP0(vdupd_lane_i64), - NEONMAP0(vdupd_laneq_f64), - NEONMAP0(vdupd_laneq_i64), - NEONMAP0(vduph_lane_i16), - NEONMAP0(vduph_laneq_i16), - NEONMAP0(vdups_lane_f32), - NEONMAP0(vdups_lane_i32), - NEONMAP0(vdups_laneq_f32), - NEONMAP0(vdups_laneq_i32), - NEONMAP0(vfmad_lane_f64), - NEONMAP0(vfmad_laneq_f64), - NEONMAP0(vfmas_lane_f32), - NEONMAP0(vfmas_laneq_f32), - NEONMAP0(vget_lane_f32), - NEONMAP0(vget_lane_f64), - NEONMAP0(vget_lane_i16), - NEONMAP0(vget_lane_i32), - NEONMAP0(vget_lane_i64), - NEONMAP0(vget_lane_i8), - NEONMAP0(vgetq_lane_f32), - NEONMAP0(vgetq_lane_f64), - NEONMAP0(vgetq_lane_i16), - NEONMAP0(vgetq_lane_i32), - NEONMAP0(vgetq_lane_i64), - NEONMAP0(vgetq_lane_i8), - NEONMAP1(vmaxnmv_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxnmvq_f32, aarch64_neon_vmaxnmv, 0), - NEONMAP1(vmaxnmvq_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxv_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxv_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_f32, aarch64_neon_vmaxv, 0), - NEONMAP1(vmaxvq_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vmaxvq_s16, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_s8, aarch64_neon_smaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u16, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vmaxvq_u8, aarch64_neon_umaxv, VectorRet | Add1ArgType), - NEONMAP1(vminnmv_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminnmvq_f32, aarch64_neon_vminnmv, 0), - NEONMAP1(vminnmvq_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vminv_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminv_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminv_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_f32, aarch64_neon_vminv, 0), - NEONMAP1(vminvq_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vminvq_s16, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s32, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_s8, aarch64_neon_sminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u16, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u32, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP1(vminvq_u8, aarch64_neon_uminv, VectorRet | Add1ArgType), - NEONMAP0(vmul_n_f64), - NEONMAP1(vmull_p64, aarch64_neon_vmull_p64, 0), - NEONMAP0(vmulxd_f64), - NEONMAP0(vmulxs_f32), - NEONMAP1(vnegd_s64, aarch64_neon_vneg, 0), - NEONMAP1(vpaddd_f64, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpaddd_s64, aarch64_neon_vpadd, 0), - NEONMAP1(vpaddd_u64, aarch64_neon_vpadd, 0), - NEONMAP1(vpadds_f32, aarch64_neon_vpfadd, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnmqd_f64, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxnms_f32, aarch64_neon_vpfmaxnm, AddRetType | Add1ArgType), - NEONMAP1(vpmaxqd_f64, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpmaxs_f32, aarch64_neon_vpmax, AddRetType | Add1ArgType), - NEONMAP1(vpminnmqd_f64, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminnms_f32, aarch64_neon_vpfminnm, AddRetType | Add1ArgType), - NEONMAP1(vpminqd_f64, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vpmins_f32, aarch64_neon_vpmin, AddRetType | Add1ArgType), - NEONMAP1(vqabsb_s8, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsd_s64, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabsh_s16, arm_neon_vqabs, VectorRet), - NEONMAP1(vqabss_s32, arm_neon_vqabs, VectorRet), - NEONMAP1(vqaddb_s8, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddb_u8, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddd_s64, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddd_u64, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqaddh_s16, arm_neon_vqadds, VectorRet), - NEONMAP1(vqaddh_u16, arm_neon_vqaddu, VectorRet), - NEONMAP1(vqadds_s32, arm_neon_vqadds, VectorRet), - NEONMAP1(vqadds_u32, arm_neon_vqaddu, VectorRet), - NEONMAP0(vqdmlalh_lane_s16), - NEONMAP0(vqdmlalh_laneq_s16), - NEONMAP1(vqdmlalh_s16, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlals_lane_s32), - NEONMAP0(vqdmlals_laneq_s32), - NEONMAP1(vqdmlals_s32, aarch64_neon_vqdmlal, VectorRet), - NEONMAP0(vqdmlslh_lane_s16), - NEONMAP0(vqdmlslh_laneq_s16), - NEONMAP1(vqdmlslh_s16, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP0(vqdmlsls_lane_s32), - NEONMAP0(vqdmlsls_laneq_s32), - NEONMAP1(vqdmlsls_s32, aarch64_neon_vqdmlsl, VectorRet), - NEONMAP1(vqdmulhh_s16, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmulhs_s32, arm_neon_vqdmulh, VectorRet), - NEONMAP1(vqdmullh_s16, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqdmulls_s32, arm_neon_vqdmull, VectorRet), - NEONMAP1(vqmovnd_s64, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnd_u64, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovnh_s16, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovnh_u16, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovns_s32, arm_neon_vqmovns, VectorRet), - NEONMAP1(vqmovns_u32, arm_neon_vqmovnu, VectorRet), - NEONMAP1(vqmovund_s64, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovunh_s16, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqmovuns_s32, arm_neon_vqmovnsu, VectorRet), - NEONMAP1(vqnegb_s8, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegd_s64, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegh_s16, arm_neon_vqneg, VectorRet), - NEONMAP1(vqnegs_s32, arm_neon_vqneg, VectorRet), - NEONMAP1(vqrdmulhh_s16, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrdmulhs_s32, arm_neon_vqrdmulh, VectorRet), - NEONMAP1(vqrshlb_s8, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlb_u8, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshld_s64, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshld_u64, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshlh_s16, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshlh_u16, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshls_s32, aarch64_neon_vqrshls, VectorRet), - NEONMAP1(vqrshls_u32, aarch64_neon_vqrshlu, VectorRet), - NEONMAP1(vqrshrnd_n_s64, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnd_n_u64, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_s16, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrnh_n_u16, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_s32, aarch64_neon_vsqrshrn, VectorRet), - NEONMAP1(vqrshrns_n_u32, aarch64_neon_vuqrshrn, VectorRet), - NEONMAP1(vqrshrund_n_s64, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshrunh_n_s16, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqrshruns_n_s32, aarch64_neon_vsqrshrun, VectorRet), - NEONMAP1(vqshlb_n_s8, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlb_n_u8, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlb_s8, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlb_u8, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshld_n_s64, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshld_n_u64, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshld_s64, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshld_u64, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlh_n_s16, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshlh_n_u16, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshlh_s16, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshlh_u16, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshls_n_s32, aarch64_neon_vqshls_n, VectorRet), - NEONMAP1(vqshls_n_u32, aarch64_neon_vqshlu_n, VectorRet), - NEONMAP1(vqshls_s32, aarch64_neon_vqshls, VectorRet), - NEONMAP1(vqshls_u32, aarch64_neon_vqshlu, VectorRet), - NEONMAP1(vqshlub_n_s8, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlud_n_s64, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshluh_n_s16, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshlus_n_s32, aarch64_neon_vsqshlu, VectorRet), - NEONMAP1(vqshrnd_n_s64, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnd_n_u64, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrnh_n_s16, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrnh_n_u16, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrns_n_s32, aarch64_neon_vsqshrn, VectorRet), - NEONMAP1(vqshrns_n_u32, aarch64_neon_vuqshrn, VectorRet), - NEONMAP1(vqshrund_n_s64, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshrunh_n_s16, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqshruns_n_s32, aarch64_neon_vsqshrun, VectorRet), - NEONMAP1(vqsubb_s8, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubb_u8, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubd_s64, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubd_u64, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubh_s16, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubh_u16, arm_neon_vqsubu, VectorRet), - NEONMAP1(vqsubs_s32, arm_neon_vqsubs, VectorRet), - NEONMAP1(vqsubs_u32, arm_neon_vqsubu, VectorRet), - NEONMAP1(vrecped_f64, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpes_f32, aarch64_neon_vrecpe, AddRetType), - NEONMAP1(vrecpsd_f64, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpss_f32, aarch64_neon_vrecps, AddRetType), - NEONMAP1(vrecpxd_f64, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrecpxs_f32, aarch64_neon_vrecpx, AddRetType), - NEONMAP1(vrshld_s64, aarch64_neon_vrshlds, 0), - NEONMAP1(vrshld_u64, aarch64_neon_vrshldu, 0), - NEONMAP1(vrshrd_n_s64, aarch64_neon_vsrshr, VectorRet), - NEONMAP1(vrshrd_n_u64, aarch64_neon_vurshr, VectorRet), - NEONMAP1(vrsqrted_f64, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtes_f32, aarch64_neon_vrsqrte, AddRetType), - NEONMAP1(vrsqrtsd_f64, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsqrtss_f32, aarch64_neon_vrsqrts, AddRetType), - NEONMAP1(vrsrad_n_s64, aarch64_neon_vrsrads_n, 0), - NEONMAP1(vrsrad_n_u64, aarch64_neon_vrsradu_n, 0), - NEONMAP0(vset_lane_f32), - NEONMAP0(vset_lane_f64), - NEONMAP0(vset_lane_i16), - NEONMAP0(vset_lane_i32), - NEONMAP0(vset_lane_i64), - NEONMAP0(vset_lane_i8), - NEONMAP0(vsetq_lane_f32), - NEONMAP0(vsetq_lane_f64), - NEONMAP0(vsetq_lane_i16), - NEONMAP0(vsetq_lane_i32), - NEONMAP0(vsetq_lane_i64), - NEONMAP0(vsetq_lane_i8), - NEONMAP1(vsha1cq_u32, arm_neon_sha1c, 0), - NEONMAP1(vsha1h_u32, arm_neon_sha1h, 0), - NEONMAP1(vsha1mq_u32, arm_neon_sha1m, 0), - NEONMAP1(vsha1pq_u32, arm_neon_sha1p, 0), - NEONMAP1(vshld_n_s64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_n_u64, aarch64_neon_vshld_n, 0), - NEONMAP1(vshld_s64, aarch64_neon_vshlds, 0), - NEONMAP1(vshld_u64, aarch64_neon_vshldu, 0), - NEONMAP1(vshrd_n_s64, aarch64_neon_vshrds_n, 0), - NEONMAP1(vshrd_n_u64, aarch64_neon_vshrdu_n, 0), - NEONMAP1(vslid_n_s64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vslid_n_u64, aarch64_neon_vsli, VectorRet), - NEONMAP1(vsqaddb_u8, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddd_u64, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqaddh_u16, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsqadds_u32, aarch64_neon_vsqadd, VectorRet), - NEONMAP1(vsrad_n_s64, aarch64_neon_vsrads_n, 0), - NEONMAP1(vsrad_n_u64, aarch64_neon_vsradu_n, 0), - NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, VectorRet), - NEONMAP1(vsubd_s64, aarch64_neon_vsubds, 0), - NEONMAP1(vsubd_u64, aarch64_neon_vsubdu, 0), - NEONMAP1(vtstd_s64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vtstd_u64, aarch64_neon_vtstd, VectorRetGetArgs01), - NEONMAP1(vuqaddb_s8, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddd_s64, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqaddh_s16, aarch64_neon_vuqadd, VectorRet), - NEONMAP1(vuqadds_s32, aarch64_neon_vuqadd, VectorRet) -}; - static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), @@ -2739,7 +2390,6 @@ static NeonIntrinsicInfo ARM64SISDIntrinsicMap[] = { #undef NEONMAP2 static bool NEONSIMDIntrinsicsProvenSorted = false; -static bool AArch64SISDIntrinsicInfoProvenSorted = false; static bool ARM64SIMDIntrinsicsProvenSorted = false; static bool ARM64SISDIntrinsicsProvenSorted = false; @@ -2869,169 +2519,6 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, return CGF.Builder.CreateBitCast(Result, ResultType, s); } -static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF, - const NeonIntrinsicInfo &SISDInfo, - const CallExpr *E) { - unsigned BuiltinID = SISDInfo.BuiltinID; - unsigned int Int = SISDInfo.LLVMIntrinsic; - const char *s = SISDInfo.NameHint; - - SmallVector Ops; - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - switch (BuiltinID) { - default: break; - case NEON::BI__builtin_neon_vdups_lane_f32: - case NEON::BI__builtin_neon_vdupd_lane_f64: - case NEON::BI__builtin_neon_vdups_laneq_f32: - case NEON::BI__builtin_neon_vdupd_laneq_f64: { - return CGF.Builder.CreateExtractElement(Ops[0], Ops[1], "vdup_lane"); - } - case NEON::BI__builtin_neon_vdupb_lane_i8: - case NEON::BI__builtin_neon_vduph_lane_i16: - case NEON::BI__builtin_neon_vdups_lane_i32: - case NEON::BI__builtin_neon_vdupd_lane_i64: - case NEON::BI__builtin_neon_vdupb_laneq_i8: - case NEON::BI__builtin_neon_vduph_laneq_i16: - case NEON::BI__builtin_neon_vdups_laneq_i32: - case NEON::BI__builtin_neon_vdupd_laneq_i64: { - // The backend treats Neon scalar types as v1ix types - // So we want to dup lane from any vector to v1ix vector - // with shufflevector - s = "vdup_lane"; - Value* SV = llvm::ConstantVector::getSplat(1, cast(Ops[1])); - Value *Result = CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], SV, s); - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - // AArch64 intrinsic one-element vector type cast to - // scalar type expected by the builtin - return CGF.Builder.CreateBitCast(Result, Ty, s); - } - case NEON::BI__builtin_neon_vqdmlalh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlalh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlals_lane_s32 : - case NEON::BI__builtin_neon_vqdmlals_laneq_s32 : - case NEON::BI__builtin_neon_vqdmlslh_lane_s16 : - case NEON::BI__builtin_neon_vqdmlslh_laneq_s16 : - case NEON::BI__builtin_neon_vqdmlsls_lane_s32 : - case NEON::BI__builtin_neon_vqdmlsls_laneq_s32 : { - Int = Intrinsic::arm_neon_vqadds; - if (BuiltinID == NEON::BI__builtin_neon_vqdmlslh_lane_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlslh_laneq_s16 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_lane_s32 || - BuiltinID == NEON::BI__builtin_neon_vqdmlsls_laneq_s32) { - Int = Intrinsic::arm_neon_vqsubs; - } - // create vqdmull call with b * c[i] - llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType()); - llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1); - Ty = CGF.ConvertType(E->getArg(0)->getType()); - llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy); - Value *V = UndefValue::get(OpVTy); - llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0); - SmallVector MulOps; - MulOps.push_back(Ops[1]); - MulOps.push_back(Ops[2]); - MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI); - MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract"); - MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI); - Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]); - // create vqadds call with a +/- vqdmull result - F = CGF.CGM.getIntrinsic(Int, ResVTy); - SmallVector AddOps; - AddOps.push_back(Ops[0]); - AddOps.push_back(MulRes); - V = UndefValue::get(ResVTy); - AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI); - Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]); - return CGF.Builder.CreateBitCast(AddRes, Ty); - } - case NEON::BI__builtin_neon_vfmas_lane_f32: - case NEON::BI__builtin_neon_vfmas_laneq_f32: - case NEON::BI__builtin_neon_vfmad_lane_f64: - case NEON::BI__builtin_neon_vfmad_laneq_f64: { - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - // Scalar Floating-point Multiply Extended - case NEON::BI__builtin_neon_vmulxs_f32: - case NEON::BI__builtin_neon_vmulxd_f64: { - Int = Intrinsic::aarch64_neon_vmulx; - llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); - return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vmul_n_f64: { - // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane - llvm::Type *VTy = GetNeonType(&CGF, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy); - llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0); - Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract"); - Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]); - return CGF.Builder.CreateBitCast(Result, VTy); - } - case NEON::BI__builtin_neon_vget_lane_i8: - case NEON::BI__builtin_neon_vget_lane_i16: - case NEON::BI__builtin_neon_vget_lane_i32: - case NEON::BI__builtin_neon_vget_lane_i64: - case NEON::BI__builtin_neon_vget_lane_f32: - case NEON::BI__builtin_neon_vget_lane_f64: - case NEON::BI__builtin_neon_vgetq_lane_i8: - case NEON::BI__builtin_neon_vgetq_lane_i16: - case NEON::BI__builtin_neon_vgetq_lane_i32: - case NEON::BI__builtin_neon_vgetq_lane_i64: - case NEON::BI__builtin_neon_vgetq_lane_f32: - case NEON::BI__builtin_neon_vgetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vget_lane_i8, E); - case NEON::BI__builtin_neon_vset_lane_i8: - case NEON::BI__builtin_neon_vset_lane_i16: - case NEON::BI__builtin_neon_vset_lane_i32: - case NEON::BI__builtin_neon_vset_lane_i64: - case NEON::BI__builtin_neon_vset_lane_f32: - case NEON::BI__builtin_neon_vset_lane_f64: - case NEON::BI__builtin_neon_vsetq_lane_i8: - case NEON::BI__builtin_neon_vsetq_lane_i16: - case NEON::BI__builtin_neon_vsetq_lane_i32: - case NEON::BI__builtin_neon_vsetq_lane_i64: - case NEON::BI__builtin_neon_vsetq_lane_f32: - case NEON::BI__builtin_neon_vsetq_lane_f64: - return CGF.EmitARMBuiltinExpr(NEON::BI__builtin_neon_vset_lane_i8, E); - - case NEON::BI__builtin_neon_vceqzd_s64: - case NEON::BI__builtin_neon_vceqzd_u64: - case NEON::BI__builtin_neon_vcgezd_s64: - case NEON::BI__builtin_neon_vcgtzd_s64: - case NEON::BI__builtin_neon_vclezd_s64: - case NEON::BI__builtin_neon_vcltzd_s64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(Ops[0]->getType())); - break; - case NEON::BI__builtin_neon_vceqzs_f32: - case NEON::BI__builtin_neon_vceqzd_f64: - case NEON::BI__builtin_neon_vcgezs_f32: - case NEON::BI__builtin_neon_vcgezd_f64: - case NEON::BI__builtin_neon_vcgtzs_f32: - case NEON::BI__builtin_neon_vcgtzd_f64: - case NEON::BI__builtin_neon_vclezs_f32: - case NEON::BI__builtin_neon_vclezd_f64: - case NEON::BI__builtin_neon_vcltzs_f32: - case NEON::BI__builtin_neon_vcltzd_f64: - // Add implicit zero operand. - Ops.push_back(llvm::Constant::getNullValue(CGF.FloatTy)); - break; - } - - // It didn't need any handling specific to the AArch64 backend, so defer to - // common code. - return EmitCommonNeonSISDBuiltinExpr(CGF, SISDInfo, Ops, E); -} - Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, @@ -3534,796 +3021,6 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef Ops, return CGF.EmitNeonCall(TblF, TblOps, Name); } -static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, - unsigned BuiltinID, - const CallExpr *E) { - unsigned int Int = 0; - const char *s = nullptr; - - switch (BuiltinID) { - default: - return nullptr; - case NEON::BI__builtin_neon_vtbl1_v: - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - case NEON::BI__builtin_neon_vtbl2_v: - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: - case NEON::BI__builtin_neon_vtbl3_v: - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - case NEON::BI__builtin_neon_vtbl4_v: - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - case NEON::BI__builtin_neon_vtbx1_v: - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - case NEON::BI__builtin_neon_vtbx2_v: - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - case NEON::BI__builtin_neon_vtbx3_v: - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - case NEON::BI__builtin_neon_vtbx4_v: - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - break; - } - - assert(E->getNumArgs() >= 3); - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - llvm::VectorType *VTy = GetNeonType(&CGF, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - SmallVector Ops; - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - Ops.push_back(CGF.EmitScalarExpr(E->getArg(i))); - } - - unsigned nElts = VTy->getNumElements(); - - // AArch64 scalar builtins are not overloaded, they do not have an extra - // argument that specifies the vector type, need to handle each case. - SmallVector TblOps; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vtbl1_v: { - TblOps.push_back(Ops[0]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl2_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - } - case NEON::BI__builtin_neon_vtbl3_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbl4_v: { - TblOps.push_back(Ops[0]); - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - } - case NEON::BI__builtin_neon_vtbx1_v: { - TblOps.push_back(Ops[1]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, - Intrinsic::aarch64_neon_vtbl1, "vtbl1"); - - llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); - Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx2_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, - Intrinsic::aarch64_neon_vtbx1, "vtbx1"); - } - case NEON::BI__builtin_neon_vtbx3_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, - Intrinsic::aarch64_neon_vtbl2, "vtbl2"); - - llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); - Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); - Value *CmpRes = CGF.Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], - TwentyFourV); - CmpRes = CGF.Builder.CreateSExt(CmpRes, Ty); - - SmallVector BslOps; - BslOps.push_back(CmpRes); - BslOps.push_back(Ops[0]); - BslOps.push_back(TblRes); - Function *BslF = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vbsl, Ty); - return CGF.EmitNeonCall(BslF, BslOps, "vbsl"); - } - case NEON::BI__builtin_neon_vtbx4_v: { - TblOps.push_back(Ops[1]); - TblOps.push_back(Ops[2]); - TblOps.push_back(Ops[3]); - TblOps.push_back(Ops[4]); - return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, - Intrinsic::aarch64_neon_vtbx2, "vtbx2"); - } - case NEON::BI__builtin_neon_vqtbl1_v: - case NEON::BI__builtin_neon_vqtbl1q_v: - Int = Intrinsic::aarch64_neon_vtbl1; s = "vtbl1"; break; - case NEON::BI__builtin_neon_vqtbl2_v: - case NEON::BI__builtin_neon_vqtbl2q_v: { - Int = Intrinsic::aarch64_neon_vtbl2; s = "vtbl2"; break; - case NEON::BI__builtin_neon_vqtbl3_v: - case NEON::BI__builtin_neon_vqtbl3q_v: - Int = Intrinsic::aarch64_neon_vtbl3; s = "vtbl3"; break; - case NEON::BI__builtin_neon_vqtbl4_v: - case NEON::BI__builtin_neon_vqtbl4q_v: - Int = Intrinsic::aarch64_neon_vtbl4; s = "vtbl4"; break; - case NEON::BI__builtin_neon_vqtbx1_v: - case NEON::BI__builtin_neon_vqtbx1q_v: - Int = Intrinsic::aarch64_neon_vtbx1; s = "vtbx1"; break; - case NEON::BI__builtin_neon_vqtbx2_v: - case NEON::BI__builtin_neon_vqtbx2q_v: - Int = Intrinsic::aarch64_neon_vtbx2; s = "vtbx2"; break; - case NEON::BI__builtin_neon_vqtbx3_v: - case NEON::BI__builtin_neon_vqtbx3q_v: - Int = Intrinsic::aarch64_neon_vtbx3; s = "vtbx3"; break; - case NEON::BI__builtin_neon_vqtbx4_v: - case NEON::BI__builtin_neon_vqtbx4q_v: - Int = Intrinsic::aarch64_neon_vtbx4; s = "vtbx4"; break; - } - } - - if (!Int) - return nullptr; - - Function *F = CGF.CGM.getIntrinsic(Int, Ty); - return CGF.EmitNeonCall(F, Ops, s); -} - -Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - - // Process AArch64 scalar builtins - llvm::ArrayRef SISDInfo(AArch64SISDIntrinsicInfo); - const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( - SISDInfo, BuiltinID, AArch64SISDIntrinsicInfoProvenSorted); - - if (Builtin) { - Value *Result = EmitAArch64ScalarBuiltinExpr(*this, *Builtin, E); - assert(Result && "SISD intrinsic should have been handled"); - return Result; - } - - // Process AArch64 table lookup builtins - if (Value *Result = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E)) - return Result; - - if (BuiltinID == AArch64::BI__clear_cache) { - assert(E->getNumArgs() == 2 && - "Variadic __clear_cache slipped through on AArch64"); - - const FunctionDecl *FD = E->getDirectCallee(); - SmallVector Ops; - for (unsigned i = 0; i < E->getNumArgs(); i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); - llvm::FunctionType *FTy = cast(Ty); - StringRef Name = FD->getName(); - return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); - } - - SmallVector Ops; - llvm::Value *Align = nullptr; // Alignment for load/store - - if (BuiltinID == NEON::BI__builtin_neon_vldrq_p128) { - Value *Op = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast(Op->getType())->getAddressSpace(); - llvm::Type *Ty = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op = Builder.CreateBitCast(Op, Ty); - Op = Builder.CreateLoad(Op); - Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); - return Builder.CreateBitCast(Op, Ty); - } - if (BuiltinID == NEON::BI__builtin_neon_vstrq_p128) { - Value *Op0 = EmitScalarExpr(E->getArg(0)); - unsigned addressSpace = - cast(Op0->getType())->getAddressSpace(); - llvm::Type *PTy = llvm::Type::getFP128PtrTy(getLLVMContext(), addressSpace); - Op0 = Builder.CreateBitCast(Op0, PTy); - Value *Op1 = EmitScalarExpr(E->getArg(1)); - llvm::Type *Ty = llvm::Type::getFP128Ty(getLLVMContext()); - Op1 = Builder.CreateBitCast(Op1, Ty); - return Builder.CreateStore(Op1, Op0); - } - for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { - if (i == 0) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_v: - case NEON::BI__builtin_neon_vld1q_v: - case NEON::BI__builtin_neon_vst1_v: - case NEON::BI__builtin_neon_vst1q_v: - case NEON::BI__builtin_neon_vst2_v: - case NEON::BI__builtin_neon_vst2q_v: - case NEON::BI__builtin_neon_vst3_v: - case NEON::BI__builtin_neon_vst3q_v: - case NEON::BI__builtin_neon_vst4_v: - case NEON::BI__builtin_neon_vst4q_v: - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - // Handle ld1/st1 lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: - case NEON::BI__builtin_neon_vst2_lane_v: - case NEON::BI__builtin_neon_vst2q_lane_v: - case NEON::BI__builtin_neon_vst3_lane_v: - case NEON::BI__builtin_neon_vst3q_lane_v: - case NEON::BI__builtin_neon_vst4_lane_v: - case NEON::BI__builtin_neon_vst4q_lane_v: - case NEON::BI__builtin_neon_vld1_dup_v: - case NEON::BI__builtin_neon_vld1q_dup_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair Src = - EmitPointerWithAlignment(E->getArg(0)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - if (i == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_v: - case NEON::BI__builtin_neon_vld2q_v: - case NEON::BI__builtin_neon_vld3_v: - case NEON::BI__builtin_neon_vld3q_v: - case NEON::BI__builtin_neon_vld4_v: - case NEON::BI__builtin_neon_vld4q_v: - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - // Handle ld1/st1 dup lane in this function a little different from ARM. - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - case NEON::BI__builtin_neon_vld2_lane_v: - case NEON::BI__builtin_neon_vld2q_lane_v: - case NEON::BI__builtin_neon_vld3_lane_v: - case NEON::BI__builtin_neon_vld3q_lane_v: - case NEON::BI__builtin_neon_vld4_lane_v: - case NEON::BI__builtin_neon_vld4q_lane_v: - // Get the alignment for the argument in addition to the value; - // we'll use it later. - std::pair Src = - EmitPointerWithAlignment(E->getArg(1)); - Ops.push_back(Src.first); - Align = Builder.getInt32(Src.second); - continue; - } - } - Ops.push_back(EmitScalarExpr(E->getArg(i))); - } - - // Get the last argument, which specifies the vector type. - llvm::APSInt Result; - const Expr *Arg = E->getArg(E->getNumArgs() - 1); - if (!Arg->isIntegerConstantExpr(Result, getContext())) - return nullptr; - - // Determine the type of this overloaded NEON intrinsic. - NeonTypeFlags Type(Result.getZExtValue()); - bool usgn = Type.isUnsigned(); - bool quad = Type.isQuad(); - - llvm::VectorType *VTy = GetNeonType(this, Type); - llvm::Type *Ty = VTy; - if (!Ty) - return nullptr; - - // Many NEON builtins have identical semantics and uses in ARM and - // AArch64. Emit these in a single function. - llvm::ArrayRef IntrinsicMap(ARMSIMDIntrinsicMap); - Builtin = findNeonIntrinsicInMap(IntrinsicMap, BuiltinID, - NEONSIMDIntrinsicsProvenSorted); - if (Builtin) - return EmitCommonNeonBuiltinExpr( - Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, - Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); - - unsigned Int; - switch (BuiltinID) { - default: - return nullptr; - - // AArch64 builtins mapping to legacy ARM v7 builtins. - // FIXME: the mapped builtins listed correspond to what has been tested - // in aarch64-neon-intrinsics.c so far. - - // Shift by immediate - case NEON::BI__builtin_neon_vrshr_n_v: - case NEON::BI__builtin_neon_vrshrq_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n"); - case NEON::BI__builtin_neon_vsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vsradu_n - : Intrinsic::aarch64_neon_vsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vsra_n"); - } - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsra_n_v, E); - case NEON::BI__builtin_neon_vsraq_n_v: - return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vsraq_n_v, E); - case NEON::BI__builtin_neon_vrsra_n_v: - if (VTy->getElementType()->isIntegerTy(64)) { - Int = usgn ? Intrinsic::aarch64_neon_vrsradu_n - : Intrinsic::aarch64_neon_vrsrads_n; - return EmitNeonCall(CGM.getIntrinsic(Int), Ops, "vrsra_n"); - } - // fall through - case NEON::BI__builtin_neon_vrsraq_n_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Int = usgn ? Intrinsic::aarch64_neon_vurshr - : Intrinsic::aarch64_neon_vsrshr; - Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); - return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); - } - case NEON::BI__builtin_neon_vqshlu_n_v: - case NEON::BI__builtin_neon_vqshluq_n_v: - Int = Intrinsic::aarch64_neon_vsqshlu; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n"); - case NEON::BI__builtin_neon_vsri_n_v: - case NEON::BI__builtin_neon_vsriq_n_v: - Int = Intrinsic::aarch64_neon_vsri; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsri_n"); - case NEON::BI__builtin_neon_vsli_n_v: - case NEON::BI__builtin_neon_vsliq_n_v: - Int = Intrinsic::aarch64_neon_vsli; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsli_n"); - case NEON::BI__builtin_neon_vqshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); - case NEON::BI__builtin_neon_vrshrn_n_v: - Int = Intrinsic::aarch64_neon_vrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); - case NEON::BI__builtin_neon_vqrshrun_n_v: - Int = Intrinsic::aarch64_neon_vsqrshrun; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); - case NEON::BI__builtin_neon_vqshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqshrn - : Intrinsic::aarch64_neon_vsqshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); - case NEON::BI__builtin_neon_vqrshrn_n_v: - Int = usgn ? Intrinsic::aarch64_neon_vuqrshrn - : Intrinsic::aarch64_neon_vsqrshrn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); - - // Convert - case NEON::BI__builtin_neon_vcvt_n_f64_v: - case NEON::BI__builtin_neon_vcvtq_n_f64_v: { - llvm::Type *FloatTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - llvm::Type *Tys[2] = { FloatTy, Ty }; - Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp - : Intrinsic::arm_neon_vcvtfxs2fp; - Function *F = CGM.getIntrinsic(Int, Tys); - return EmitNeonCall(F, Ops, "vcvt_n"); - } - - // Load/Store - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: { - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld1_x2_v: - case NEON::BI__builtin_neon_vld1q_x2_v: - Int = Intrinsic::aarch64_neon_vld1x2; - break; - case NEON::BI__builtin_neon_vld1_x3_v: - case NEON::BI__builtin_neon_vld1q_x3_v: - Int = Intrinsic::aarch64_neon_vld1x3; - break; - case NEON::BI__builtin_neon_vld1_x4_v: - case NEON::BI__builtin_neon_vld1q_x4_v: - Int = Intrinsic::aarch64_neon_vld1x4; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld1xN"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: { - Ops.push_back(Align); - unsigned Int; - switch (BuiltinID) { - case NEON::BI__builtin_neon_vst1_x2_v: - case NEON::BI__builtin_neon_vst1q_x2_v: - Int = Intrinsic::aarch64_neon_vst1x2; - break; - case NEON::BI__builtin_neon_vst1_x3_v: - case NEON::BI__builtin_neon_vst1q_x3_v: - Int = Intrinsic::aarch64_neon_vst1x3; - break; - case NEON::BI__builtin_neon_vst1_x4_v: - case NEON::BI__builtin_neon_vst1q_x4_v: - Int = Intrinsic::aarch64_neon_vst1x4; - break; - } - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); - } - case NEON::BI__builtin_neon_vld1_lane_v: - case NEON::BI__builtin_neon_vld1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ty = llvm::PointerType::getUnqual(VTy->getElementType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - LoadInst *Ld = Builder.CreateLoad(Ops[0]); - Ld->setAlignment(cast(Align)->getZExtValue()); - return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); - } - case NEON::BI__builtin_neon_vst1_lane_v: - case NEON::BI__builtin_neon_vst1q_lane_v: { - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - StoreInst *St = - Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); - St->setAlignment(cast(Align)->getZExtValue()); - return St; - } - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: { - // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. - if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && - VTy->getNumElements() == 1) { - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - Int = Intrinsic::arm_neon_vld2; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - Int = Intrinsic::arm_neon_vld3; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - Int = Intrinsic::arm_neon_vld4; - break; - default: - llvm_unreachable("unknown vld_dup intrinsic?"); - } - Function *F = CGM.getIntrinsic(Int, Ty); - Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - switch (BuiltinID) { - case NEON::BI__builtin_neon_vld2_dup_v: - case NEON::BI__builtin_neon_vld2q_dup_v: - Int = Intrinsic::arm_neon_vld2lane; - break; - case NEON::BI__builtin_neon_vld3_dup_v: - case NEON::BI__builtin_neon_vld3q_dup_v: - Int = Intrinsic::arm_neon_vld3lane; - break; - case NEON::BI__builtin_neon_vld4_dup_v: - case NEON::BI__builtin_neon_vld4q_dup_v: - Int = Intrinsic::arm_neon_vld4lane; - break; - } - Function *F = CGM.getIntrinsic(Int, Ty); - llvm::StructType *STy = cast(F->getReturnType()); - - SmallVector Args; - Args.push_back(Ops[1]); - Args.append(STy->getNumElements(), UndefValue::get(Ty)); - - llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); - Args.push_back(CI); - Args.push_back(Align); - - Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); - // splat lane 0 to all elts in each vector of the result. - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Value *Val = Builder.CreateExtractValue(Ops[1], i); - Value *Elt = Builder.CreateBitCast(Val, Ty); - Elt = EmitNeonSplat(Elt, CI); - Elt = Builder.CreateBitCast(Elt, Val->getType()); - Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); - } - Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - return Builder.CreateStore(Ops[1], Ops[0]); - } - - case NEON::BI__builtin_neon_vmul_lane_v: - case NEON::BI__builtin_neon_vmul_laneq_v: { - // v1f64 vmul_lane should be mapped to Neon scalar mul lane - bool Quad = false; - if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) - Quad = true; - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); - Ops[1] = Builder.CreateBitCast(Ops[1], VTy); - Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); - Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); - return Builder.CreateBitCast(Result, Ty); - } - - // AArch64-only builtins - case NEON::BI__builtin_neon_vfmaq_laneq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfmaq_lane_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::VectorType *VTy = cast(Ty); - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() / 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_lane_v: { - llvm::VectorType *VTy = cast(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, false)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - Ops[2] = EmitNeonSplat(Ops[2], cast(Ops[3])); - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfma_laneq_v: { - llvm::VectorType *VTy = cast(Ty); - // v1f64 fma should be mapped to Neon scalar f64 fma - if (VTy && VTy->getElementType() == DoubleTy) { - Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); - Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); - llvm::Type *VTy = GetNeonType(this, - NeonTypeFlags(NeonTypeFlags::Float64, false, true)); - Ops[2] = Builder.CreateBitCast(Ops[2], VTy); - Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); - Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); - Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - return Builder.CreateBitCast(Result, Ty); - } - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - - llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), - VTy->getNumElements() * 2); - Ops[2] = Builder.CreateBitCast(Ops[2], STy); - Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), - cast(Ops[3])); - Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); - - return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); - } - case NEON::BI__builtin_neon_vfms_v: - case NEON::BI__builtin_neon_vfmsq_v: { - Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ops[1] = Builder.CreateBitCast(Ops[1], Ty); - Ops[1] = Builder.CreateFNeg(Ops[1]); - Ops[2] = Builder.CreateBitCast(Ops[2], Ty); - - // LLVM's fma intrinsic puts the accumulator in the last position, but the - // AArch64 intrinsic has it first. - return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); - } - case NEON::BI__builtin_neon_vmaxnm_v: - case NEON::BI__builtin_neon_vmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); - } - case NEON::BI__builtin_neon_vminnm_v: - case NEON::BI__builtin_neon_vminnmq_v: { - Int = Intrinsic::aarch64_neon_vminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); - } - case NEON::BI__builtin_neon_vpmaxnm_v: - case NEON::BI__builtin_neon_vpmaxnmq_v: { - Int = Intrinsic::aarch64_neon_vpmaxnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); - } - case NEON::BI__builtin_neon_vpminnm_v: - case NEON::BI__builtin_neon_vpminnmq_v: { - Int = Intrinsic::aarch64_neon_vpminnm; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); - } - case NEON::BI__builtin_neon_vpmaxq_v: { - Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); - } - case NEON::BI__builtin_neon_vpminq_v: { - Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); - } - case NEON::BI__builtin_neon_vmulx_v: - case NEON::BI__builtin_neon_vmulxq_v: { - Int = Intrinsic::aarch64_neon_vmulx; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); - } - case NEON::BI__builtin_neon_vsqadd_v: - case NEON::BI__builtin_neon_vsqaddq_v: { - Int = Intrinsic::aarch64_neon_usqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); - } - case NEON::BI__builtin_neon_vuqadd_v: - case NEON::BI__builtin_neon_vuqaddq_v: { - Int = Intrinsic::aarch64_neon_suqadd; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); - } - case NEON::BI__builtin_neon_vrbit_v: - case NEON::BI__builtin_neon_vrbitq_v: - Int = Intrinsic::aarch64_neon_rbit; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); - case NEON::BI__builtin_neon_vcvt_f32_f64: { - NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); - Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); - return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvtx_f32_v: { - llvm::Type *EltTy = FloatTy; - llvm::Type *ResTy = llvm::VectorType::get(EltTy, 2); - llvm::Type *Tys[2] = { ResTy, Ty }; - Int = Intrinsic::aarch64_neon_vcvtxn; - return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtx_f32_f64"); - } - case NEON::BI__builtin_neon_vcvt_f64_f32: { - llvm::Type *OpTy = - GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, false)); - Ops[0] = Builder.CreateBitCast(Ops[0], OpTy); - return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vcvt_f64_v: - case NEON::BI__builtin_neon_vcvtq_f64_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], Ty); - Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); - return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") - : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); - } - case NEON::BI__builtin_neon_vrndn_v: - case NEON::BI__builtin_neon_vrndnq_v: { - Int = Intrinsic::aarch64_neon_frintn; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); - } - case NEON::BI__builtin_neon_vrnda_v: - case NEON::BI__builtin_neon_vrndaq_v: { - Int = Intrinsic::round; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); - } - case NEON::BI__builtin_neon_vrndp_v: - case NEON::BI__builtin_neon_vrndpq_v: { - Int = Intrinsic::ceil; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); - } - case NEON::BI__builtin_neon_vrndm_v: - case NEON::BI__builtin_neon_vrndmq_v: { - Int = Intrinsic::floor; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); - } - case NEON::BI__builtin_neon_vrndx_v: - case NEON::BI__builtin_neon_vrndxq_v: { - Int = Intrinsic::rint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); - } - case NEON::BI__builtin_neon_vrnd_v: - case NEON::BI__builtin_neon_vrndq_v: { - Int = Intrinsic::trunc; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd"); - } - case NEON::BI__builtin_neon_vrndi_v: - case NEON::BI__builtin_neon_vrndiq_v: { - Int = Intrinsic::nearbyint; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); - } - case NEON::BI__builtin_neon_vsqrt_v: - case NEON::BI__builtin_neon_vsqrtq_v: { - Int = Intrinsic::sqrt; - return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); - } - case NEON::BI__builtin_neon_vceqz_v: - case NEON::BI__builtin_neon_vceqzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, - ICmpInst::ICMP_EQ, "vceqz"); - case NEON::BI__builtin_neon_vcgez_v: - case NEON::BI__builtin_neon_vcgezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, - ICmpInst::ICMP_SGE, "vcgez"); - case NEON::BI__builtin_neon_vclez_v: - case NEON::BI__builtin_neon_vclezq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, - ICmpInst::ICMP_SLE, "vclez"); - case NEON::BI__builtin_neon_vcgtz_v: - case NEON::BI__builtin_neon_vcgtzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, - ICmpInst::ICMP_SGT, "vcgtz"); - case NEON::BI__builtin_neon_vcltz_v: - case NEON::BI__builtin_neon_vcltzq_v: - return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, - ICmpInst::ICMP_SLT, "vcltz"); - } -} - Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { unsigned HintID = static_cast(-1); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 944a0cf..7443339 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2201,8 +2201,6 @@ public: const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name = ""); - llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty); - llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 52e4132..88c4d96 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -4512,221 +4512,6 @@ llvm::Value *NaClARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, } //===----------------------------------------------------------------------===// -// AArch64 ABI Implementation -//===----------------------------------------------------------------------===// - -namespace { - -class AArch64ABIInfo : public ABIInfo { -public: - AArch64ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {} - -private: - // The AArch64 PCS is explicit about return types and argument types being - // handled identically, so we don't need to draw a distinction between - // Argument and Return classification. - ABIArgInfo classifyGenericType(QualType Ty, int &FreeIntRegs, - int &FreeVFPRegs) const; - - ABIArgInfo tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, bool IsInt, - llvm::Type *DirectTy = nullptr) const; - - void computeInfo(CGFunctionInfo &FI) const override; - - llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const override; -}; - -class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { -public: - AArch64TargetCodeGenInfo(CodeGenTypes &CGT) - :TargetCodeGenInfo(new AArch64ABIInfo(CGT)) {} - - const AArch64ABIInfo &getABIInfo() const { - return static_cast(TargetCodeGenInfo::getABIInfo()); - } - - int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { - return 31; - } - - bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, - llvm::Value *Address) const override { - // 0-31 are x0-x30 and sp: 8 bytes each - llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8); - AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 31); - - // 64-95 are v0-v31: 16 bytes each - llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16); - AssignToArrayRange(CGF.Builder, Address, Sixteen8, 64, 95); - - return false; - } - -}; - -} - -void AArch64ABIInfo::computeInfo(CGFunctionInfo &FI) const { - int FreeIntRegs = 8, FreeVFPRegs = 8; - - FI.getReturnInfo() = classifyGenericType(FI.getReturnType(), - FreeIntRegs, FreeVFPRegs); - - FreeIntRegs = FreeVFPRegs = 8; - for (auto &I : FI.arguments()) { - I.info = classifyGenericType(I.type, FreeIntRegs, FreeVFPRegs); - - } -} - -ABIArgInfo -AArch64ABIInfo::tryUseRegs(QualType Ty, int &FreeRegs, int RegsNeeded, - bool IsInt, llvm::Type *DirectTy) const { - if (FreeRegs >= RegsNeeded) { - FreeRegs -= RegsNeeded; - return ABIArgInfo::getDirect(DirectTy); - } - - llvm::Type *Padding = nullptr; - - // We need padding so that later arguments don't get filled in anyway. That - // wouldn't happen if only ByVal arguments followed in the same category, but - // a large structure will simply seem to be a pointer as far as LLVM is - // concerned. - if (FreeRegs > 0) { - if (IsInt) - Padding = llvm::Type::getInt64Ty(getVMContext()); - else - Padding = llvm::Type::getFloatTy(getVMContext()); - - // Either [N x i64] or [N x float]. - Padding = llvm::ArrayType::get(Padding, FreeRegs); - FreeRegs = 0; - } - - return ABIArgInfo::getIndirect(getContext().getTypeAlign(Ty) / 8, - /*IsByVal=*/ true, /*Realign=*/ false, - Padding); -} - - -ABIArgInfo AArch64ABIInfo::classifyGenericType(QualType Ty, - int &FreeIntRegs, - int &FreeVFPRegs) const { - // Can only occurs for return, but harmless otherwise. - if (Ty->isVoidType()) - return ABIArgInfo::getIgnore(); - - // Large vector types should be returned via memory. There's no such concept - // in the ABI, but they'd be over 16 bytes anyway so no matter how they're - // classified they'd go into memory (see B.3). - if (Ty->isVectorType() && getContext().getTypeSize(Ty) > 128) { - if (FreeIntRegs > 0) - --FreeIntRegs; - return ABIArgInfo::getIndirect(0, /*ByVal=*/false); - } - - // All non-aggregate LLVM types have a concrete ABI representation so they can - // be passed directly. After this block we're guaranteed to be in a - // complicated case. - if (!isAggregateTypeForABI(Ty)) { - // Treat an enum type as its underlying type. - if (const EnumType *EnumTy = Ty->getAs()) - Ty = EnumTy->getDecl()->getIntegerType(); - - if (Ty->isFloatingType() || Ty->isVectorType()) - return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ false); - - assert(getContext().getTypeSize(Ty) <= 128 && - "unexpectedly large scalar type"); - - int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1; - - // If the type may need padding registers to ensure "alignment", we must be - // careful when this is accounted for. Increasing the effective size covers - // all cases. - if (getContext().getTypeAlign(Ty) == 128) - RegsNeeded += FreeIntRegs % 2 != 0; - - return tryUseRegs(Ty, FreeIntRegs, RegsNeeded, /*IsInt=*/ true); - } - - if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { - if (FreeIntRegs > 0 && RAA == CGCXXABI::RAA_Indirect) - --FreeIntRegs; - return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory); - } - - if (isEmptyRecord(getContext(), Ty, true)) { - if (!getContext().getLangOpts().CPlusPlus) { - // Empty structs outside C++ mode are a GNU extension, so no ABI can - // possibly tell us what to do. It turns out (I believe) that GCC ignores - // the object for parameter-passsing purposes. - return ABIArgInfo::getIgnore(); - } - - // The combination of C++98 9p5 (sizeof(struct) != 0) and the pseudocode - // description of va_arg in the PCS require that an empty struct does - // actually occupy space for parameter-passing. I'm hoping for a - // clarification giving an explicit paragraph to point to in future. - return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ 1, /*IsInt=*/ true, - llvm::Type::getInt8Ty(getVMContext())); - } - - // Homogeneous vector aggregates get passed in registers or on the stack. - const Type *Base = nullptr; - uint64_t NumMembers = 0; - if (isHomogeneousAggregate(Ty, Base, getContext(), &NumMembers)) { - assert(Base && "Base class should be set for homogeneous aggregate"); - // Homogeneous aggregates are passed and returned directly. - return tryUseRegs(Ty, FreeVFPRegs, /*RegsNeeded=*/ NumMembers, - /*IsInt=*/ false); - } - - uint64_t Size = getContext().getTypeSize(Ty); - if (Size <= 128) { - // Small structs can use the same direct type whether they're in registers - // or on the stack. - llvm::Type *BaseTy; - unsigned NumBases; - int SizeInRegs = (Size + 63) / 64; - - if (getContext().getTypeAlign(Ty) == 128) { - BaseTy = llvm::Type::getIntNTy(getVMContext(), 128); - NumBases = 1; - - // If the type may need padding registers to ensure "alignment", we must - // be careful when this is accounted for. Increasing the effective size - // covers all cases. - SizeInRegs += FreeIntRegs % 2 != 0; - } else { - BaseTy = llvm::Type::getInt64Ty(getVMContext()); - NumBases = SizeInRegs; - } - llvm::Type *DirectTy = llvm::ArrayType::get(BaseTy, NumBases); - - return tryUseRegs(Ty, FreeIntRegs, /*RegsNeeded=*/ SizeInRegs, - /*IsInt=*/ true, DirectTy); - } - - // If the aggregate is > 16 bytes, it's passed and returned indirectly. In - // LLVM terms the return uses an "sret" pointer, but that's handled elsewhere. - --FreeIntRegs; - return ABIArgInfo::getIndirect(0, /* byVal = */ false); -} - -llvm::Value *AArch64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, - CodeGenFunction &CGF) const { - int FreeIntRegs = 8, FreeVFPRegs = 8; - Ty = CGF.getContext().getCanonicalType(Ty); - ABIArgInfo AI = classifyGenericType(Ty, FreeIntRegs, FreeVFPRegs); - - return EmitAArch64VAArg(VAListAddr, Ty, 8 - FreeIntRegs, 8 - FreeVFPRegs, - AI.isIndirect(), CGF); -} - -//===----------------------------------------------------------------------===// // NVPTX ABI Implementation //===----------------------------------------------------------------------===// @@ -6684,6 +6469,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { case llvm::Triple::mips64el: return *(TheTargetCodeGenInfo = new MIPSTargetCodeGenInfo(Types, false)); + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: { ARM64ABIInfo::ABIKind Kind = ARM64ABIInfo::AAPCS; @@ -6693,10 +6480,6 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return *(TheTargetCodeGenInfo = new ARM64TargetCodeGenInfo(Types, Kind)); } - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types)); - case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp index 7670daa..4c097c5 100644 --- a/clang/lib/Driver/Tools.cpp +++ b/clang/lib/Driver/Tools.cpp @@ -444,26 +444,6 @@ void Clang::AddPreprocessingOptions(Compilation &C, getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs); } -/// getAArch64TargetCPU - Get the (LLVM) name of the AArch64 cpu we are targeting. -// -// FIXME: tblgen this. -static std::string getAArch64TargetCPU(const ArgList &Args, - const llvm::Triple &Triple) { - // FIXME: Warn on inconsistent use of -mcpu and -march. - - // If we have -mcpu=, use that. - if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { - StringRef MCPU = A->getValue(); - // Handle -mcpu=native. - if (MCPU == "native") - return llvm::sys::getHostCPUName(); - else - return MCPU; - } - - return "generic"; -} - // FIXME: Move to target hook. static bool isSignedCharDefault(const llvm::Triple &Triple) { switch (Triple.getArch()) { @@ -1345,8 +1325,6 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: - return getAArch64TargetCPU(Args, T); - case llvm::Triple::arm64: case llvm::Triple::arm64_be: return getARM64TargetCPU(Args); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 7974b6f..78ba66b 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -309,16 +309,13 @@ Sema::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { if (CheckARMBuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: case llvm::Triple::arm64: case llvm::Triple::arm64_be: if (CheckARM64BuiltinFunctionCall(BuiltinID, TheCall)) return ExprError(); break; - case llvm::Triple::aarch64: - case llvm::Triple::aarch64_be: - if (CheckAArch64BuiltinFunctionCall(BuiltinID, TheCall)) - return ExprError(); - break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: @@ -472,14 +469,6 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { return SemaBuiltinConstantArgRange(TheCall, i, l, u + l); } -bool Sema::CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, - CallExpr *TheCall) { - if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall)) - return true; - - return false; -} - bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall, unsigned MaxWidth) { assert((BuiltinID == ARM::BI__builtin_arm_ldrex || diff --git a/clang/test/CodeGen/aarch64-arguments.c b/clang/test/CodeGen/aarch64-arguments.c deleted file mode 100644 index f875886..0000000 --- a/clang/test/CodeGen/aarch64-arguments.c +++ /dev/null @@ -1,199 +0,0 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s - -// Sign extension is performed by the callee on AArch64, which means -// that we *shouldn't* tag arguments and returns with their extension. - -// PCS-LABEL: define i8 @f0(i16 %a) -char f0(short a) { - return a; -} - -// PCS: define [1 x i64] @f1() -struct s1 { char f0; }; -struct s1 f1(void) {} - -// PCS: define [1 x i64] @f2() -struct s2 { short f0; }; -struct s2 f2(void) {} - -// PCS: define [1 x i64] @f3() -struct s3 { int f0; }; -struct s3 f3(void) {} - -// PCS: define [1 x i64] @f4() -struct s4 { struct s4_0 { int f0; } f0; }; -struct s4 f4(void) {} - -// PCS: define [1 x i64] @f5() -struct s5 { struct { } f0; int f1; }; -struct s5 f5(void) {} - -// PCS: define [1 x i64] @f6() -struct s6 { int f0[1]; }; -struct s6 f6(void) {} - -// PCS-LABEL: define void @f7() -struct s7 { struct { int : 0; } f0; }; -struct s7 f7(void) {} - -// PCS-LABEL: define void @f8() -struct s8 { struct { int : 0; } f0[1]; }; -struct s8 f8(void) {} - -// PCS: define [1 x i64] @f9() -struct s9 { long f0; int : 0; }; -struct s9 f9(void) {} - -// PCS: define [1 x i64] @f10() -struct s10 { long f0; int : 0; int : 0; }; -struct s10 f10(void) {} - -// PCS: define [1 x i64] @f11() -struct s11 { int : 0; long f0; }; -struct s11 f11(void) {} - -// PCS: define [1 x i64] @f12() -union u12 { char f0; short f1; int f2; long f3; }; -union u12 f12(void) {} - -// PCS-LABEL: define %struct.s13 @f13() -struct s13 { float f0; }; -struct s13 f13(void) {} - -// PCS-LABEL: define %union.u14 @f14() -union u14 { float f0; }; -union u14 f14(void) {} - -// PCS-LABEL: define void @f15() -void f15(struct s7 a0) {} - -// PCS-LABEL: define void @f16() -void f16(struct s8 a0) {} - -// PCS: define [1 x i64] @f17() -struct s17 { short f0 : 13; char f1 : 4; }; -struct s17 f17(void) {} - -// PCS: define [1 x i64] @f18() -struct s18 { short f0; char f1 : 4; }; -struct s18 f18(void) {} - -// PCS: define [1 x i64] @f19() -struct s19 { long f0; struct s8 f1; }; -struct s19 f19(void) {} - -// PCS: define [1 x i64] @f20() -struct s20 { struct s8 f1; long f0; }; -struct s20 f20(void) {} - -// PCS: define [1 x i64] @f21() -struct s21 { struct {} f1; long f0 : 4; }; -struct s21 f21(void) {} - -// PCS: define { float, float } @f22() -// PCS: define { double, double } @f23( -_Complex float f22(void) {} -_Complex double f23(void) {} - -// PCS: define [1 x i64] @f24() -struct s24 { _Complex char f0; }; -struct s24 f24() {} - -// PCS: define [1 x i64] @f25() -struct s25 { _Complex short f0; }; -struct s25 f25() {} - -// PCS: define [1 x i64] @f26() -struct s26 { _Complex int f0; }; -struct s26 f26() {} - -// PCS: define [2 x i64] @f27() -struct s27 { _Complex long f0; }; -struct s27 f27() {} - -// PCS-LABEL: define void @f28(i8 %a, i16 %b, i32 %c, i64 %d, float %e, double %f) -void f28(char a, short b, int c, long d, float e, double f) {} - -// PCS: define void @f29([2 x i64] %a -struct s29 { int arr[4]; }; -void f29(struct s29 a) {} - -// PCS-LABEL: define void @f30(%struct.s30* %a) -struct s30 { int arr[4]; char c;}; -void f30(struct s30 a) {} - -// PCS: define void @f31([4 x double] %a -struct s31 { double arr[4]; }; -void f31(struct s31 a) {} - -// PCS-LABEL: define void @f32(%struct.s32* %a) -struct s32 { float arr[5]; }; -void f32(struct s32 a) {} - -// Not the only solution, but it *is* an HFA. -// PCS: define void @f33([3 x float] %a.coerce0, float %a.coerce1) -struct s33 { float arr[3]; float a; }; -void f33(struct s33 a) {} - -// PCS-LABEL: define void @f34(%struct.s34* noalias sret -struct s34 { int a[4]; char b }; -struct s34 f34(void) {} - -// PCS-LABEL: define void @f35() -struct s35 {}; -void f35(struct s35 a) {} - -// Check padding is added: -// PCS: @f36(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s36* byval align 8 %stacked) -struct s36 { long a, b; }; -void f36(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s36 stacked) {} - -// But only once: -// PCS: @f37(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s37* byval align 8 %stacked, %struct.s37* byval align 8 %stacked2) -struct s37 { long a, b; }; -void f37(int x0, int x1, int x2, int x3, int x4, int x5, int x6, struct s37 stacked, struct s37 stacked2) {} - -// Check for HFA padding args. Also, they should not end up on the stack in a -// way which will have holes in when lowered further by LLVM. In particular [3 x -// float] would be unacceptable. - -// PCS: @f38(float %s0, double %d1, float %s2, float %s3, float %s4, float %s5, [2 x float], %struct.s38* byval align 4 %stacked) -struct s38 { float a, b, c; }; -void f38(float s0, double d1, float s2, float s3, float s4, float s5, struct s38 stacked) {} - -// Check both VFP and integer arguments are padded (also that pointers and enums -// get counted as integer types correctly). -struct s39_int { long a, b; }; -struct s39_float { float a, b, c, d; }; -enum s39_enum { Val1, Val2 }; -// PCS: @f39(float %s0, i32 %x0, float %s1, i32* %x1, float %s2, i32 %x2, float %s3, float %s4, i32 %x3, [3 x float], %struct.s39_float* byval align 4 %stacked, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s39_int* byval align 8 %stacked2) -void f39(float s0, int x0, float s1, int *x1, float s2, enum s39_enum x2, float s3, float s4, - int x3, struct s39_float stacked, int x4, int x5, int x6, - struct s39_int stacked2) {} - -struct s40 { __int128 a; }; -// PCS: @f40(i32 %x0, [1 x i128] %x2_3.coerce, i32 %x4, i32 %x5, i32 %x6, [1 x i64], %struct.s40* byval align 16 %stacked) -void f40(int x0, struct s40 x2_3, int x4, int x5, int x6, struct s40 stacked) {} - -// Checking: __int128 will get properly aligned type, with padding so big struct doesn't use x7. -struct s41 { int arr[5]; }; -// PCS: @f41(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, [1 x i64], i128* byval align 16, %struct.s41* %stacked2) -int f41(int x0, int x1, int x2, int x3, int x4, int x5, int x6, __int128 stacked, struct s41 stacked2) {} - -// Checking: __int128 needing to be aligned in registers will consume correct -// number. Previously padding was inserted before "stacked" because x6_7 was -// "allocated" to x5 and x6 by clang. -// PCS: @f42(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i128 %x6_7, i128* byval align 16) -void f42(int x0, int x1, int x2, int x3, int x4, __int128 x6_7, __int128 stacked) {} - -// Checking: __fp16 is extended to double when calling variadic functions -void variadic(int a, ...); -void f43(__fp16 *in) { - variadic(42, *in); -// PCS: call void (i32, ...)* @variadic(i32 42, double -} - -// Checking: `double' and `long double' have different machine types, so cannot both be in an HFA -struct s44 { long double a; double b; }; -// PCS: define void @f44(%struct.s44* -struct s44 f44() {} diff --git a/clang/test/CodeGen/aarch64-inline-asm.c b/clang/test/CodeGen/aarch64-inline-asm.c index 8dd2e34..c7ce375 100644 --- a/clang/test/CodeGen/aarch64-inline-asm.c +++ b/clang/test/CodeGen/aarch64-inline-asm.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -emit-llvm -o - %s | FileCheck %s // The only part clang really deals with is the lvalue/rvalue diff --git a/clang/test/CodeGen/aarch64-neon-2velem.c b/clang/test/CodeGen/aarch64-neon-2velem.c index c00c8bb..d292b85 100644 --- a/clang/test/CodeGen/aarch64-neon-2velem.c +++ b/clang/test/CodeGen/aarch64-neon-2velem.c @@ -1,9 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s // Test new aarch64 intrinsics and types diff --git a/clang/test/CodeGen/aarch64-neon-3v.c b/clang/test/CodeGen/aarch64-neon-3v.c index 5c51c09..866f8f5 100644 --- a/clang/test/CodeGen/aarch64-neon-3v.c +++ b/clang/test/CodeGen/aarch64-neon-3v.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s // Test new aarch64 intrinsics and types diff --git a/clang/test/CodeGen/aarch64-neon-across.c b/clang/test/CodeGen/aarch64-neon-across.c index 330869e..986574a 100644 --- a/clang/test/CodeGen/aarch64-neon-across.c +++ b/clang/test/CodeGen/aarch64-neon-across.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-copy.c b/clang/test/CodeGen/aarch64-neon-copy.c deleted file mode 100644 index f8b3d90..0000000 --- a/clang/test/CodeGen/aarch64-neon-copy.c +++ /dev/null @@ -1,1416 +0,0 @@ -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s - -// Test new aarch64 intrinsics and types - -#include - -uint8x8_t test_vset_lane_u8(uint8_t v1, uint8x8_t v2) { - // CHECK-LABEL: test_vset_lane_u8 - return vset_lane_u8(v1, v2, 6); - // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} -} - -uint16x4_t test_vset_lane_u16(uint16_t v1, uint16x4_t v2) { - // CHECK-LABEL: test_vset_lane_u16 - return vset_lane_u16(v1, v2, 2); - // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} -} - -uint32x2_t test_vset_lane_u32(uint32_t v1, uint32x2_t v2) { - // CHECK-LABEL: test_vset_lane_u32 - return vset_lane_u32(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} -} -uint64x1_t test_vset_lane_u64(uint64_t v1, uint64x1_t v2) { - // CHECK-LABEL: test_vset_lane_u64 - return vset_lane_u64(v1, v2, 0); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -int8x8_t test_vset_lane_s8(int8_t v1, int8x8_t v2) { - // CHECK-LABEL: test_vset_lane_s8 - return vset_lane_s8(v1, v2, 6); - // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} -} - -int16x4_t test_vset_lane_s16(int16_t v1, int16x4_t v2) { - // CHECK-LABEL: test_vset_lane_s16 - return vset_lane_s16(v1, v2, 2); - // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} -} - -int32x2_t test_vset_lane_s32(int32_t v1, int32x2_t v2) { - // CHECK-LABEL: test_vset_lane_s32 - return vset_lane_s32(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} -} - - int64x1_t test_vset_lane_s64(int64_t v1, int64x1_t v2) { - // CHECK-LABEL: test_vset_lane_s64 - return vset_lane_s64(v1, v2, 0); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint8x16_t test_vsetq_lane_u8(uint8_t v1, uint8x16_t v2) { - // CHECK-LABEL: test_vsetq_lane_u8 - return vsetq_lane_u8(v1, v2, 6); - // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} -} - -uint16x8_t test_vsetq_lane_u16(uint16_t v1, uint16x8_t v2) { - // CHECK-LABEL: test_vsetq_lane_u16 - return vsetq_lane_u16(v1, v2, 2); - // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} -} - -uint32x4_t test_vsetq_lane_u32(uint32_t v1, uint32x4_t v2) { - // CHECK-LABEL: test_vsetq_lane_u32 - return vsetq_lane_u32(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} -} - - uint64x2_t test_vsetq_lane_u64(uint64_t v1, uint64x2_t v2) { - // CHECK-LABEL: test_vsetq_lane_u64 - return vsetq_lane_u64(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.d[1], {{x[0-9]+}} -} - -int8x16_t test_vsetq_lane_s8(int8_t v1, int8x16_t v2) { - // CHECK-LABEL: test_vsetq_lane_s8 - return vsetq_lane_s8(v1, v2, 6); - // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} -} - -int16x8_t test_vsetq_lane_s16(int16_t v1, int16x8_t v2) { - // CHECK-LABEL: test_vsetq_lane_s16 - return vsetq_lane_s16(v1, v2, 2); - // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} -} - -int32x4_t test_vsetq_lane_s32(int32_t v1, int32x4_t v2) { - // CHECK-LABEL: test_vsetq_lane_s32 - return vsetq_lane_s32(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} -} - -int64x2_t test_vsetq_lane_s64(int64_t v1, int64x2_t v2) { - // CHECK-LABEL: test_vsetq_lane_s64 - return vsetq_lane_s64(v1, v2, 0); - // CHECK: ins {{v[0-9]+}}.d[0], {{x[0-9]+}} -} - -poly8x8_t test_vset_lane_p8(poly8_t v1, poly8x8_t v2) { - // CHECK-LABEL: test_vset_lane_p8 - return vset_lane_p8(v1, v2, 6); - // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} -} - -poly16x4_t test_vset_lane_p16(poly16_t v1, poly16x4_t v2) { - // CHECK-LABEL: test_vset_lane_p16 - return vset_lane_p16(v1, v2, 2); - // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} -} - -poly8x16_t test_vsetq_lane_p8(poly8_t v1, poly8x16_t v2) { - // CHECK-LABEL: test_vsetq_lane_p8 - return vsetq_lane_p8(v1, v2, 6); - // CHECK: ins {{v[0-9]+}}.b[6], {{w[0-9]+}} -} - -poly16x8_t test_vsetq_lane_p16(poly16_t v1, poly16x8_t v2) { - // CHECK-LABEL: test_vsetq_lane_p16 - return vsetq_lane_p16(v1, v2, 2); - // CHECK: ins {{v[0-9]+}}.h[2], {{w[0-9]+}} -} - -float32x2_t test_vset_lane_f32(float32_t v1, float32x2_t v2) { - // CHECK-LABEL: test_vset_lane_f32 - return vset_lane_f32(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -} - -float32x4_t test_vsetq_lane_f32(float32_t v1, float32x4_t v2) { - // CHECK-LABEL: test_vsetq_lane_f32 - return vsetq_lane_f32(v1, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[1], {{v[0-9]+}}.s[0] -} - -float64x1_t test_vset_lane_f64(float64_t v1, float64x1_t v2) { - // CHECK-LABEL: test_vset_lane_f64 - return vset_lane_f64(v1, v2, 0); - // CHECK: ret -} - -float64x2_t test_vsetq_lane_f64(float64_t v1, float64x2_t v2) { - // CHECK-LABEL: test_vsetq_lane_f64 - return vsetq_lane_f64(v1, v2, 0); - // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[0] -} - -uint8_t test_vget_lane_u8(uint8x8_t v1) { - // CHECK-LABEL: test_vget_lane_u8 - return vget_lane_u8(v1, 7); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] -} - -uint16_t test_vget_lane_u16(uint16x4_t v1) { - // CHECK-LABEL: test_vget_lane_u16 - return vget_lane_u16(v1, 3); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3] -} - -uint32_t test_vget_lane_u32(uint32x2_t v1) { - // CHECK-LABEL: test_vget_lane_u32 - return vget_lane_u32(v1, 1); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[1] -} - -uint64_t test_vget_lane_u64(uint64x1_t v1) { - // CHECK-LABEL: test_vget_lane_u64 - return vget_lane_u64(v1, 0); - // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} -} - -uint8_t test_vgetq_lane_u8(uint8x16_t v1) { - // CHECK-LABEL: test_vgetq_lane_u8 - return vgetq_lane_u8(v1, 15); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[15] -} - -uint16_t test_vgetq_lane_u16(uint16x8_t v1) { - // CHECK-LABEL: test_vgetq_lane_u16 - return vgetq_lane_u16(v1, 6); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6] -} - -uint32_t test_vgetq_lane_u32(uint32x4_t v1) { - // CHECK-LABEL: test_vgetq_lane_u32 - return vgetq_lane_u32(v1, 2); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.s[2] -} - -uint64_t test_vgetq_lane_u64(uint64x2_t v1) { - // CHECK-LABEL: test_vgetq_lane_u64 - return vgetq_lane_u64(v1, 1); - // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1] -} - -poly8_t test_vget_lane_p8(poly8x8_t v1) { - // CHECK-LABEL: test_vget_lane_p8 - return vget_lane_p8(v1, 7); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[7] -} - -poly16_t test_vget_lane_p16(poly16x4_t v1) { - // CHECK-LABEL: test_vget_lane_p16 - return vget_lane_p16(v1, 3); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[3] -} - -poly8_t test_vgetq_lane_p8(poly8x16_t v1) { - // CHECK-LABEL: test_vgetq_lane_p8 - return vgetq_lane_p8(v1, 14); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.b[14] -} - -poly16_t test_vgetq_lane_p16(poly16x8_t v1) { - // CHECK-LABEL: test_vgetq_lane_p16 - return vgetq_lane_p16(v1, 6); - // CHECK: umov {{w[0-9]+}}, {{v[0-9]+}}.h[6] -} - -int32_t test_vget_lane_s8(int8x8_t v1) { - // CHECK-LABEL: test_vget_lane_s8 - return vget_lane_s8(v1, 7)+1; - // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[7] -} - -int32_t test_vget_lane_s16(int16x4_t v1) { - // CHECK-LABEL: test_vget_lane_s16 - return vget_lane_s16(v1, 3)+1; - // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[3] -} - -int64_t test_vget_lane_s32(int32x2_t v1) { - // CHECK-LABEL: test_vget_lane_s32 - return vget_lane_s32(v1, 1); - // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[1] -} - -int64_t test_vget_lane_s64(int64x1_t v1) { - // CHECK-LABEL: test_vget_lane_s64 - return vget_lane_s64(v1, 0); - // CHECK: fmov {{x[0-9]+}}, {{d[0-9]+}} -} - -int32_t test_vgetq_lane_s8(int8x16_t v1) { - // CHECK-LABEL: test_vgetq_lane_s8 - return vgetq_lane_s8(v1, 15)+1; - // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.b[15] -} - -int32_t test_vgetq_lane_s16(int16x8_t v1) { - // CHECK-LABEL: test_vgetq_lane_s16 - return vgetq_lane_s16(v1, 6)+1; - // CHECK: smov {{w[0-9]+}}, {{v[0-9]+}}.h[6] -} - -int64_t test_vgetq_lane_s32(int32x4_t v1) { - // CHECK-LABEL: test_vgetq_lane_s32 - return vgetq_lane_s32(v1, 2); - // CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.s[2] -} - -int64_t test_vgetq_lane_s64(int64x2_t v1) { - // CHECK-LABEL: test_vgetq_lane_s64 - return vgetq_lane_s64(v1, 1); - // CHECK: umov {{x[0-9]+}}, {{v[0-9]+}}.d[1] -} - -int8x8_t test_vcopy_lane_s8(int8x8_t v1, int8x8_t v2) { - // CHECK-LABEL: test_vcopy_lane_s8 - return vcopy_lane_s8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -int16x4_t test_vcopy_lane_s16(int16x4_t v1, int16x4_t v2) { - // CHECK-LABEL: test_vcopy_lane_s16 - return vcopy_lane_s16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -poly8x8_t test_vcopy_lane_p8(poly8x8_t v1, poly8x8_t v2) { - // CHECK-LABEL: test_vcopy_lane_p8 - return vcopy_lane_p8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -poly16x4_t test_vcopy_lane_p16(poly16x4_t v1, poly16x4_t v2) { - // CHECK-LABEL: test_vcopy_lane_p16 - return vcopy_lane_p16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) { - // CHECK-LABEL: test_vcopy_lane_s32 - return vcopy_lane_s32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -float32x2_t test_vcopy_lane_f32(float32x2_t v1, float32x2_t v2) { - // CHECK-LABEL: test_vcopy_lane_f32 - return vcopy_lane_f32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -uint8x8_t test_vcopy_lane_u8(uint8x8_t v1, uint8x8_t v2) { - // CHECK-LABEL: test_vcopy_lane_u8 - return vcopy_lane_u8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -uint16x4_t test_vcopy_lane_u16(uint16x4_t v1, uint16x4_t v2) { - // CHECK-LABEL: test_vcopy_lane_u16 - return vcopy_lane_u16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -uint32x2_t test_vcopy_lane_u32(uint32x2_t v1, uint32x2_t v2) { - // CHECK-LABEL: test_vcopy_lane_u32 - return vcopy_lane_u32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -int8x8_t test_vcopy_laneq_s8(int8x8_t v1, int8x16_t v2) { - // CHECK-LABEL: test_vcopy_laneq_s8 - return vcopy_laneq_s8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -int16x4_t test_vcopy_laneq_s16(int16x4_t v1, int16x8_t v2) { - // CHECK-LABEL: test_vcopy_laneq_s16 - return vcopy_laneq_s16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -poly8x8_t test_vcopy_laneq_p8(poly8x8_t v1, poly8x16_t v2) { - // CHECK-LABEL: test_vcopy_laneq_p8 - return vcopy_laneq_p8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -poly16x4_t test_vcopy_laneq_p16(poly16x4_t v1, poly16x8_t v2) { - // CHECK-LABEL: test_vcopy_laneq_p16 - return vcopy_laneq_p16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -int32x2_t test_vcopy_laneq_s32(int32x2_t v1, int32x4_t v2) { - // CHECK-LABEL: test_vcopy_laneq_s32 - return vcopy_laneq_s32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -float32x2_t test_vcopy_laneq_f32(float32x2_t v1, float32x4_t v2) { - // CHECK-LABEL: test_vcopy_laneq_f32 - return vcopy_laneq_f32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -uint8x8_t test_vcopy_laneq_u8(uint8x8_t v1, uint8x16_t v2) { - // CHECK-LABEL: test_vcopy_laneq_u8 - return vcopy_laneq_u8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -uint16x4_t test_vcopy_laneq_u16(uint16x4_t v1, uint16x8_t v2) { - // CHECK-LABEL: test_vcopy_laneq_u16 - return vcopy_laneq_u16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -uint32x2_t test_vcopy_laneq_u32(uint32x2_t v1, uint32x4_t v2) { - // CHECK-LABEL: test_vcopy_laneq_u32 - return vcopy_laneq_u32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -int8x16_t test_vcopyq_lane_s8(int8x16_t v1, int8x8_t v2) { - // CHECK-LABEL: test_vcopyq_lane_s8 - return vcopyq_lane_s8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -int16x8_t test_vcopyq_lane_s16(int16x8_t v1, int16x4_t v2) { - // CHECK-LABEL: test_vcopyq_lane_s16 - return vcopyq_lane_s16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -poly8x16_t test_vcopyq_lane_p8(poly8x16_t v1, poly8x8_t v2) { - // CHECK-LABEL: test_vcopyq_lane_p8 - return vcopyq_lane_p8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -poly16x8_t test_vcopyq_lane_p16(poly16x8_t v1, poly16x4_t v2) { - // CHECK-LABEL: test_vcopyq_lane_p16 - return vcopyq_lane_p16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -int32x4_t test_vcopyq_lane_s32(int32x4_t v1, int32x2_t v2) { - // CHECK-LABEL: test_vcopyq_lane_s32 - return vcopyq_lane_s32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -int64x2_t test_vcopyq_lane_s64(int64x2_t v1, int64x1_t v2) { - // CHECK-LABEL: test_vcopyq_lane_s64 - return vcopyq_lane_s64(v1, 1, v2, 0); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -} - -float32x4_t test_vcopyq_lane_f32(float32x4_t v1, float32x2_t v2) { - // CHECK-LABEL: test_vcopyq_lane_f32 - return vcopyq_lane_f32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -float64x2_t test_vcopyq_lane_f64(float64x2_t v1, float64x1_t v2) { - // CHECK-LABEL: test_vcopyq_lane_f64 - return vcopyq_lane_f64(v1, 1, v2, 0); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -} - -uint8x16_t test_vcopyq_lane_u8(uint8x16_t v1, uint8x8_t v2) { - // CHECK-LABEL: test_vcopyq_lane_u8 - return vcopyq_lane_u8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -uint16x8_t test_vcopyq_lane_u16(uint16x8_t v1, uint16x4_t v2) { - // CHECK-LABEL: test_vcopyq_lane_u16 - return vcopyq_lane_u16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -uint32x4_t test_vcopyq_lane_u32(uint32x4_t v1, uint32x2_t v2) { - // CHECK-LABEL: test_vcopyq_lane_u32 - return vcopyq_lane_u32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -uint64x2_t test_vcopyq_lane_u64(uint64x2_t v1, uint64x1_t v2) { - // CHECK-LABEL: test_vcopyq_lane_u64 - return vcopyq_lane_u64(v1, 1, v2, 0); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] -} - -int8x16_t test_vcopyq_laneq_s8(int8x16_t v1, int8x16_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_s8 - return vcopyq_laneq_s8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -int16x8_t test_vcopyq_laneq_s16(int16x8_t v1, int16x8_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_s16 - return vcopyq_laneq_s16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -poly8x16_t test_vcopyq_laneq_p8(poly8x16_t v1, poly8x16_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_p8 - return vcopyq_laneq_p8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -poly16x8_t test_vcopyq_laneq_p16(poly16x8_t v1, poly16x8_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_p16 - return vcopyq_laneq_p16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -int32x4_t test_vcopyq_laneq_s32(int32x4_t v1, int32x4_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_s32 - return vcopyq_laneq_s32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -float32x4_t test_vcopyq_laneq_f32(float32x4_t v1, float32x4_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_f32 - return vcopyq_laneq_f32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -int64x2_t test_vcopyq_laneq_s64(int64x2_t v1, int64x2_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_s64 - return vcopyq_laneq_s64(v1, 1, v2, 1); - // CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] -} - -uint8x16_t test_vcopyq_laneq_u8(uint8x16_t v1, uint8x16_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_u8 - return vcopyq_laneq_u8(v1, 5, v2, 3); - // CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3] -} - -uint16x8_t test_vcopyq_laneq_u16(uint16x8_t v1, uint16x8_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_u16 - return vcopyq_laneq_u16(v1, 2, v2, 3); - // CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3] -} - -uint32x4_t test_vcopyq_laneq_u32(uint32x4_t v1, uint32x4_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_u32 - return vcopyq_laneq_u32(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1] -} - -uint64x2_t test_vcopyq_laneq_u64(uint64x2_t v1, uint64x2_t v2) { - // CHECK-LABEL: test_vcopyq_laneq_u64 - return vcopyq_laneq_u64(v1, 0, v2, 1); - // CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] -} - -int8x8_t test_vcreate_s8(uint64_t v1) { - // CHECK-LABEL: test_vcreate_s8 - return vcreate_s8(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -int16x4_t test_vcreate_s16(uint64_t v1) { - // CHECK-LABEL: test_vcreate_s16 - return vcreate_s16(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -int32x2_t test_vcreate_s32(uint64_t v1) { - // CHECK-LABEL: test_vcreate_s32 - return vcreate_s32(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -int64x1_t test_vcreate_s64(uint64_t v1) { - // CHECK-LABEL: test_vcreate_s64 - return vcreate_s64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint8x8_t test_vcreate_u8(uint64_t v1) { - // CHECK-LABEL: test_vcreate_u8 - return vcreate_u8(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint16x4_t test_vcreate_u16(uint64_t v1) { - // CHECK-LABEL: test_vcreate_u16 - return vcreate_u16(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint32x2_t test_vcreate_u32(uint64_t v1) { - // CHECK-LABEL: test_vcreate_u32 - return vcreate_u32(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint64x1_t test_vcreate_u64(uint64_t v1) { - // CHECK-LABEL: test_vcreate_u64 - return vcreate_u64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -poly8x8_t test_vcreate_p8(uint64_t v1) { - // CHECK-LABEL: test_vcreate_p8 - return vcreate_p8(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -poly16x4_t test_vcreate_p16(uint64_t v1) { - // CHECK-LABEL: test_vcreate_p16 - return vcreate_p16(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -float16x4_t test_vcreate_f16(uint64_t v1) { - // CHECK-LABEL: test_vcreate_f16 - return vcreate_f16(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -float32x2_t test_vcreate_f32(uint64_t v1) { - // CHECK-LABEL: test_vcreate_f32 - return vcreate_f32(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -float64x1_t test_vcreate_f64(uint64_t v1) { - // CHECK-LABEL: test_vcreate_f64 - return vcreate_f64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint8x8_t test_vdup_n_u8(uint8_t v1) { - // CHECK-LABEL: test_vdup_n_u8 - return vdup_n_u8(v1); - // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} -} - -uint16x4_t test_vdup_n_u16(uint16_t v1) { - // CHECK-LABEL: test_vdup_n_u16 - return vdup_n_u16(v1); - // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} -} - -uint32x2_t test_vdup_n_u32(uint32_t v1) { - // CHECK-LABEL: test_vdup_n_u32 - return vdup_n_u32(v1); - // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} -} - -uint64x1_t test_vdup_n_u64(uint64_t v1) { - // CHECK-LABEL: test_vdup_n_u64 - return vdup_n_u64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint8x16_t test_vdupq_n_u8(uint8_t v1) { - // CHECK-LABEL: test_vdupq_n_u8 - return vdupq_n_u8(v1); - // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} -} - -uint16x8_t test_vdupq_n_u16(uint16_t v1) { - // CHECK-LABEL: test_vdupq_n_u16 - return vdupq_n_u16(v1); - // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} -} - -uint32x4_t test_vdupq_n_u32(uint32_t v1) { - // CHECK-LABEL: test_vdupq_n_u32 - return vdupq_n_u32(v1); - // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} -} - -uint64x2_t test_vdupq_n_u64(uint64_t v1) { - // CHECK-LABEL: test_vdupq_n_u64 - return vdupq_n_u64(v1); - // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} -} - -int8x8_t test_vdup_n_s8(int8_t v1) { - // CHECK-LABEL: test_vdup_n_s8 - return vdup_n_s8(v1); - // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} -} - -int16x4_t test_vdup_n_s16(int16_t v1) { - // CHECK-LABEL: test_vdup_n_s16 - return vdup_n_s16(v1); - // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} -} - -int32x2_t test_vdup_n_s32(int32_t v1) { - // CHECK-LABEL: test_vdup_n_s32 - return vdup_n_s32(v1); - // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} -} - -int64x1_t test_vdup_n_s64(int64_t v1) { - // CHECK-LABEL: test_vdup_n_s64 - return vdup_n_s64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -int8x16_t test_vdupq_n_s8(int8_t v1) { - // CHECK-LABEL: test_vdupq_n_s8 - return vdupq_n_s8(v1); - // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} -} - -int16x8_t test_vdupq_n_s16(int16_t v1) { - // CHECK-LABEL: test_vdupq_n_s16 - return vdupq_n_s16(v1); - // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} -} - -int32x4_t test_vdupq_n_s32(int32_t v1) { - // CHECK-LABEL: test_vdupq_n_s32 - return vdupq_n_s32(v1); - // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} -} - -int64x2_t test_vdupq_n_s64(int64_t v1) { - // CHECK-LABEL: test_vdupq_n_s64 - return vdupq_n_s64(v1); - // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} -} - -poly8x8_t test_vdup_n_p8(poly8_t v1) { - // CHECK-LABEL: test_vdup_n_p8 - return vdup_n_p8(v1); - // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} -} - -poly16x4_t test_vdup_n_p16(poly16_t v1) { - // CHECK-LABEL: test_vdup_n_p16 - return vdup_n_p16(v1); - // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} -} - -poly8x16_t test_vdupq_n_p8(poly8_t v1) { - // CHECK-LABEL: test_vdupq_n_p8 - return vdupq_n_p8(v1); - // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} -} - -poly16x8_t test_vdupq_n_p16(poly16_t v1) { - // CHECK-LABEL: test_vdupq_n_p16 - return vdupq_n_p16(v1); - // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} -} - -float32x2_t test_vdup_n_f32(float32_t v1) { - // CHECK-LABEL: test_vdup_n_f32 - return vdup_n_f32(v1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -} - -float64x1_t test_vdup_n_f64(float64_t v1) { - // CHECK-LABEL: test_vdup_n_f64 - return vdup_n_f64(v1); - // CHECK: ret -} - -float32x4_t test_vdupq_n_f32(float32_t v1) { - // CHECK-LABEL: test_vdupq_n_f32 - return vdupq_n_f32(v1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -} - -float64x2_t test_vdupq_n_f64(float64_t v1) { - // CHECK-LABEL: test_vdupq_n_f64 - return vdupq_n_f64(v1); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -int8x8_t test_vdup_lane_s8(int8x8_t v1) { - // CHECK-LABEL: test_vdup_lane_s8 - return vdup_lane_s8(v1, 5); - // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] -} - -int16x4_t test_vdup_lane_s16(int16x4_t v1) { - // CHECK-LABEL: test_vdup_lane_s16 - return vdup_lane_s16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -int32x2_t test_vdup_lane_s32(int32x2_t v1) { - // CHECK-LABEL: test_vdup_lane_s32 - return vdup_lane_s32(v1, 1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -} - -int64x1_t test_vdup_lane_s64(int64x1_t v1) { - // CHECK-LABEL: test_vdup_lane_s64 - return vdup_lane_s64(v1, 0); - // CHECK: ret -} - -int8x16_t test_vdupq_lane_s8(int8x8_t v1) { - // CHECK-LABEL: test_vdupq_lane_s8 - return vdupq_lane_s8(v1, 5); - // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] -} - -int16x8_t test_vdupq_lane_s16(int16x4_t v1) { - // CHECK-LABEL: test_vdupq_lane_s16 - return vdupq_lane_s16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -int32x4_t test_vdupq_lane_s32(int32x2_t v1) { - // CHECK-LABEL: test_vdupq_lane_s32 - return vdupq_lane_s32(v1, 1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -int64x2_t test_vdupq_lane_s64(int64x1_t v1) { - // CHECK-LABEL: test_vdupq_lane_s64 - return vdupq_lane_s64(v1, 0); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -uint8x8_t test_vdup_lane_u8(uint8x8_t v1) { - // CHECK-LABEL: test_vdup_lane_u8 - return vdup_lane_u8(v1, 5); - // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] -} - -uint16x4_t test_vdup_lane_u16(uint16x4_t v1) { - // CHECK-LABEL: test_vdup_lane_u16 - return vdup_lane_u16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -uint32x2_t test_vdup_lane_u32(uint32x2_t v1) { - // CHECK-LABEL: test_vdup_lane_u32 - return vdup_lane_u32(v1, 1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -} - -uint64x1_t test_vdup_lane_u64(uint64x1_t v1) { - // CHECK-LABEL: test_vdup_lane_u64 - return vdup_lane_u64(v1, 0); - // CHECK: ret -} - -uint8x16_t test_vdupq_lane_u8(uint8x8_t v1) { - // CHECK-LABEL: test_vdupq_lane_u8 - return vdupq_lane_u8(v1, 5); - // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] -} - -uint16x8_t test_vdupq_lane_u16(uint16x4_t v1) { - // CHECK-LABEL: test_vdupq_lane_u16 - return vdupq_lane_u16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -uint32x4_t test_vdupq_lane_u32(uint32x2_t v1) { - // CHECK-LABEL: test_vdupq_lane_u32 - return vdupq_lane_u32(v1, 1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -uint64x2_t test_vdupq_lane_u64(uint64x1_t v1) { - // CHECK-LABEL: test_vdupq_lane_u64 - return vdupq_lane_u64(v1, 0); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -int8x8_t test_vdup_laneq_s8(int8x16_t v1) { - // CHECK-LABEL: test_vdup_laneq_s8 - return vdup_laneq_s8(v1, 5); - // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] -} - -int16x4_t test_vdup_laneq_s16(int16x8_t v1) { - // CHECK-LABEL: test_vdup_laneq_s16 - return vdup_laneq_s16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -int32x2_t test_vdup_laneq_s32(int32x4_t v1) { - // CHECK-LABEL: test_vdup_laneq_s32 - return vdup_laneq_s32(v1, 1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -} - -int64x1_t test_vdup_laneq_s64(int64x2_t v1) { - // CHECK-LABEL: test_vdup_laneq_s64 - return vdup_laneq_s64(v1, 0); - // CHECK: ret -} - -int8x16_t test_vdupq_laneq_s8(int8x16_t v1) { - // CHECK-LABEL: test_vdupq_laneq_s8 - return vdupq_laneq_s8(v1, 5); - // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] -} - -int16x8_t test_vdupq_laneq_s16(int16x8_t v1) { - // CHECK-LABEL: test_vdupq_laneq_s16 - return vdupq_laneq_s16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -int32x4_t test_vdupq_laneq_s32(int32x4_t v1) { - // CHECK-LABEL: test_vdupq_laneq_s32 - return vdupq_laneq_s32(v1, 1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -int64x2_t test_vdupq_laneq_s64(int64x2_t v1) { - // CHECK-LABEL: test_vdupq_laneq_s64 - return vdupq_laneq_s64(v1, 0); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -uint8x8_t test_vdup_laneq_u8(uint8x16_t v1) { - // CHECK-LABEL: test_vdup_laneq_u8 - return vdup_laneq_u8(v1, 5); - // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] -} - -uint16x4_t test_vdup_laneq_u16(uint16x8_t v1) { - // CHECK-LABEL: test_vdup_laneq_u16 - return vdup_laneq_u16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -uint32x2_t test_vdup_laneq_u32(uint32x4_t v1) { - // CHECK-LABEL: test_vdup_laneq_u32 - return vdup_laneq_u32(v1, 1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -} - -uint64x1_t test_vdup_laneq_u64(uint64x2_t v1) { - // CHECK-LABEL: test_vdup_laneq_u64 - return vdup_laneq_u64(v1, 0); - // CHECK: ret -} - -uint8x16_t test_vdupq_laneq_u8(uint8x16_t v1) { - // CHECK-LABEL: test_vdupq_laneq_u8 - return vdupq_laneq_u8(v1, 5); - // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] -} - -uint16x8_t test_vdupq_laneq_u16(uint16x8_t v1) { - // CHECK-LABEL: test_vdupq_laneq_u16 - return vdupq_laneq_u16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -uint32x4_t test_vdupq_laneq_u32(uint32x4_t v1) { - // CHECK-LABEL: test_vdupq_laneq_u32 - return vdupq_laneq_u32(v1, 1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -uint64x2_t test_vdupq_laneq_u64(uint64x2_t v1) { - // CHECK-LABEL: test_vdupq_laneq_u64 - return vdupq_laneq_u64(v1, 0); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -poly8x8_t test_vdup_lane_p8(poly8x8_t v1) { - // CHECK-LABEL: test_vdup_lane_p8 - return vdup_lane_p8(v1, 5); - // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] -} - -poly16x4_t test_vdup_lane_p16(poly16x4_t v1) { - // CHECK-LABEL: test_vdup_lane_p16 - return vdup_lane_p16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -poly8x16_t test_vdupq_lane_p8(poly8x8_t v1) { - // CHECK-LABEL: test_vdupq_lane_p8 - return vdupq_lane_p8(v1, 5); - // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] -} - -poly16x8_t test_vdupq_lane_p16(poly16x4_t v1) { - // CHECK-LABEL: test_vdupq_lane_p16 - return vdupq_lane_p16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -poly8x8_t test_vdup_laneq_p8(poly8x16_t v1) { - // CHECK-LABEL: test_vdup_laneq_p8 - return vdup_laneq_p8(v1, 5); - // CHECK: dup {{v[0-9]+}}.8b, {{v[0-9]+}}.b[5] -} - -poly16x4_t test_vdup_laneq_p16(poly16x8_t v1) { - // CHECK-LABEL: test_vdup_laneq_p16 - return vdup_laneq_p16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -poly8x16_t test_vdupq_laneq_p8(poly8x16_t v1) { - // CHECK-LABEL: test_vdupq_laneq_p8 - return vdupq_laneq_p8(v1, 5); - // CHECK: dup {{v[0-9]+}}.16b, {{v[0-9]+}}.b[5] -} - -poly16x8_t test_vdupq_laneq_p16(poly16x8_t v1) { - // CHECK-LABEL: test_vdupq_laneq_p16 - return vdupq_laneq_p16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -float16x4_t test_vdup_lane_f16(float16x4_t v1) { - // CHECK-LABEL: test_vdup_lane_f16 - return vdup_lane_f16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -float32x2_t test_vdup_lane_f32(float32x2_t v1) { - // CHECK-LABEL: test_vdup_lane_f32 - return vdup_lane_f32(v1, 1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -} - -float64x1_t test_vdup_lane_f64(float64x1_t v1) { - // CHECK-LABEL: test_vdup_lane_f64 - return vdup_lane_f64(v1, 0); - // CHECK: ret -} - -float16x4_t test_vdup_laneq_f16(float16x8_t v1) { - // CHECK-LABEL: test_vdup_laneq_f16 - return vdup_laneq_f16(v1, 2); - // CHECK: dup {{v[0-9]+}}.4h, {{v[0-9]+}}.h[2] -} - -float32x2_t test_vdup_laneq_f32(float32x4_t v1) { - // CHECK-LABEL: test_vdup_laneq_f32 - return vdup_laneq_f32(v1, 1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] -} - -float64x1_t test_vdup_laneq_f64(float64x2_t v1) { - // CHECK-LABEL: test_vdup_laneq_f64 - return vdup_laneq_f64(v1, 0); - // CHECK: ret -} - -float16x8_t test_vdupq_lane_f16(float16x4_t v1) { - // CHECK-LABEL: test_vdupq_lane_f16 - return vdupq_lane_f16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -float32x4_t test_vdupq_lane_f32(float32x2_t v1) { - // CHECK-LABEL: test_vdupq_lane_f32 - return vdupq_lane_f32(v1, 1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -float64x2_t test_vdupq_lane_f64(float64x1_t v1) { - // CHECK-LABEL: test_vdupq_lane_f64 - return vdupq_lane_f64(v1, 0); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -float16x8_t test_vdupq_laneq_f16(float16x8_t v1) { - // CHECK-LABEL: test_vdupq_laneq_f16 - return vdupq_laneq_f16(v1, 2); - // CHECK: dup {{v[0-9]+}}.8h, {{v[0-9]+}}.h[2] -} - -float32x4_t test_vdupq_laneq_f32(float32x4_t v1) { - // CHECK-LABEL: test_vdupq_laneq_f32 - return vdupq_laneq_f32(v1, 1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] -} - -float64x2_t test_vdupq_laneq_f64(float64x2_t v1) { - // CHECK-LABEL: test_vdupq_laneq_f64 - return vdupq_laneq_f64(v1, 0); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -int8x8_t test_vmov_n_s8(int8_t v1) { - // CHECK-LABEL: test_vmov_n_s8 - return vmov_n_s8(v1); - // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} -} - -int16x4_t test_vmov_n_s16(int16_t v1) { - // CHECK-LABEL: test_vmov_n_s16 - return vmov_n_s16(v1); - // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} -} - -int32x2_t test_vmov_n_s32(int32_t v1) { - // CHECK-LABEL: test_vmov_n_s32 - return vmov_n_s32(v1); - // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} -} - -int64x1_t test_vmov_n_s64(int64_t v1) { - // CHECK-LABEL: test_vmov_n_s64 - return vmov_n_s64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -int8x16_t test_vmovq_n_s8(int8_t v1) { - // CHECK-LABEL: test_vmovq_n_s8 - return vmovq_n_s8(v1); - // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} -} - -int16x8_t test_vmovq_n_s16(int16_t v1) { - // CHECK-LABEL: test_vmovq_n_s16 - return vmovq_n_s16(v1); - // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} -} - -int32x4_t test_vmovq_n_s32(int32_t v1) { - // CHECK-LABEL: test_vmovq_n_s32 - return vmovq_n_s32(v1); - // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} -} - -int64x2_t test_vmovq_n_s64(int64_t v1) { - // CHECK-LABEL: test_vmovq_n_s64 - return vmovq_n_s64(v1); - // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} -} - -uint8x8_t test_vmov_n_u8(uint8_t v1) { - // CHECK-LABEL: test_vmov_n_u8 - return vmov_n_u8(v1); - // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} -} - -uint16x4_t test_vmov_n_u16(uint16_t v1) { - // CHECK-LABEL: test_vmov_n_u16 - return vmov_n_u16(v1); - // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} -} - -uint32x2_t test_vmov_n_u32(uint32_t v1) { - // CHECK-LABEL: test_vmov_n_u32 - return vmov_n_u32(v1); - // CHECK: dup {{v[0-9]+}}.2s, {{w[0-9]+}} -} - -uint64x1_t test_vmov_n_u64(uint64_t v1) { - // CHECK-LABEL: test_vmov_n_u64 - return vmov_n_u64(v1); - // CHECK: fmov {{d[0-9]+}}, {{x[0-9]+}} -} - -uint8x16_t test_vmovq_n_u8(uint8_t v1) { - // CHECK-LABEL: test_vmovq_n_u8 - return vmovq_n_u8(v1); - // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} -} - -uint16x8_t test_vmovq_n_u16(uint16_t v1) { - // CHECK-LABEL: test_vmovq_n_u16 - return vmovq_n_u16(v1); - // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} -} - -uint32x4_t test_vmovq_n_u32(uint32_t v1) { - // CHECK-LABEL: test_vmovq_n_u32 - return vmovq_n_u32(v1); - // CHECK: dup {{v[0-9]+}}.4s, {{w[0-9]+}} -} - -uint64x2_t test_vmovq_n_u64(uint64_t v1) { - // CHECK-LABEL: test_vmovq_n_u64 - return vmovq_n_u64(v1); - // CHECK: dup {{v[0-9]+}}.2d, {{x[0-9]+}} -} - -poly8x8_t test_vmov_n_p8(poly8_t v1) { - // CHECK-LABEL: test_vmov_n_p8 - return vmov_n_p8(v1); - // CHECK: dup {{v[0-9]+}}.8b, {{w[0-9]+}} -} - -poly16x4_t test_vmov_n_p16(poly16_t v1) { - // CHECK-LABEL: test_vmov_n_p16 - return vmov_n_p16(v1); - // CHECK: dup {{v[0-9]+}}.4h, {{w[0-9]+}} -} - -poly8x16_t test_vmovq_n_p8(poly8_t v1) { - // CHECK-LABEL: test_vmovq_n_p8 - return vmovq_n_p8(v1); - // CHECK: dup {{v[0-9]+}}.16b, {{w[0-9]+}} -} - -poly16x8_t test_vmovq_n_p16(poly16_t v1) { - // CHECK-LABEL: test_vmovq_n_p16 - return vmovq_n_p16(v1); - // CHECK: dup {{v[0-9]+}}.8h, {{w[0-9]+}} -} - -float32x2_t test_vmov_n_f32(float32_t v1) { - // CHECK-LABEL: test_vmov_n_f32 - return vmov_n_f32(v1); - // CHECK: dup {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] -} - -float64x1_t test_vmov_n_f64(float64_t v1) { - // CHECK-LABEL: test_vmov_n_f64 - return vmov_n_f64(v1); - // CHECK: ret -} - -float32x4_t test_vmovq_n_f32(float32_t v1) { - // CHECK-LABEL: test_vmovq_n_f32 - return vmovq_n_f32(v1); - // CHECK: dup {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] -} - -float64x2_t test_vmovq_n_f64(float64_t v1) { - // CHECK-LABEL: test_vmovq_n_f64 - return vmovq_n_f64(v1); - // CHECK: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] -} - -// CHECK-LABEL: test_vcopy_lane_s64 -int64x1_t test_vcopy_lane_s64(int64x1_t a, int64x1_t c) { - return vcopy_lane_s64(a, 0, c, 0); -// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} -// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] -} - -// CHECK-LABEL: test_vcopy_lane_u64 -uint64x1_t test_vcopy_lane_u64(uint64x1_t a, uint64x1_t c) { - return vcopy_lane_u64(a, 0, c, 0); -// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} -// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] -} - -// CHECK-LABEL: test_vcopy_lane_f64 -float64x1_t test_vcopy_lane_f64(float64x1_t a, float64x1_t c) { - return vcopy_lane_f64(a, 0, c, 0); -// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}} -// CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] -} - -// CHECK-LABEL: test_vcopy_laneq_s64 -int64x1_t test_vcopy_laneq_s64(int64x1_t a, int64x2_t c) { - return vcopy_laneq_s64(a, 0, c, 1); -// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -} - -// CHECK-LABEL: test_vcopy_laneq_u64 -uint64x1_t test_vcopy_laneq_u64(uint64x1_t a, uint64x2_t c) { - return vcopy_laneq_u64(a, 0, c, 1); -// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -} - -// CHECK-LABEL: test_vcopy_laneq_f64 -float64x1_t test_vcopy_laneq_f64(float64x1_t a, float64x2_t c) { - return vcopy_laneq_f64(a, 0, c, 1); -// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -} - -// CHECK-LABEL: test_vcopy_laneq_p64 -poly64x1_t test_vcopy_laneq_p64(poly64x1_t a, poly64x2_t c) { - return vcopy_laneq_p64(a, 0, c, 1); -// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -} - -// CHECK-LABEL: test_vcopyq_laneq_f64 -float64x2_t test_vcopyq_laneq_f64(float64x2_t a, float64x2_t c) { -// CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] - return vcopyq_laneq_f64(a, 1, c, 1); -} - -// CHECK-LABEL: test_vget_lane_f16 -int test_vget_lane_f16(float16x4_t v1) { - float16_t a = vget_lane_f16(v1, 3); - return (int)a; -// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] -} - -// CHECK-LABEL: test_vgetq_lane_f16 -int test_vgetq_lane_f16(float16x8_t v1) { - float16_t a = vgetq_lane_f16(v1, 7); - return (int)a; -// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] -} - -// CHECK-LABEL: test2_vget_lane_f16 -float test2_vget_lane_f16(float16x4_t v1) { - float16_t a = vget_lane_f16(v1, 3); - return (float)a; -// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[3] -} - -// CHECK-LABEL: test2_vgetq_lane_f16 -float test2_vgetq_lane_f16(float16x8_t v1) { - float16_t a = vgetq_lane_f16(v1, 7); - return (float)a; -// CHECK: dup {{h[0-9]+}}, {{v[0-9]+}}.h[7] -} - -// CHECK-LABEL: test_vset_lane_f16 -float16x4_t test_vset_lane_f16(float16x4_t v1) { - float16_t a = 0.0; - return vset_lane_f16(a, v1, 3); -// CHECK: ins {{v[0-9]+}}.h[3], wzr -} - -// CHECK-LABEL: test_vsetq_lane_f16 -float16x8_t test_vsetq_lane_f16(float16x8_t v1) { - float16_t a = 0.0; - return vsetq_lane_f16(a, v1, 7); -// CHECK: ins {{v[0-9]+}}.h[7], wzr -} - -// CHECK-LABEL: test2_vset_lane_f16 -float16x4_t test2_vset_lane_f16(float16x4_t v1) { - float16_t a = 1.0; - return vset_lane_f16(a, v1, 3); -// CHECK: movz {{w[0-9]+}}, #15360 -// CHECK-NEXT: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} -} - -// CHECK-LABEL: test2_vsetq_lane_f16 -float16x8_t test2_vsetq_lane_f16(float16x8_t v1) { - float16_t a = 1.0; - return vsetq_lane_f16(a, v1, 7); -// CHECK: movz {{w[0-9]+}}, #15360 -// CHECK-NEXT: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} -} - -// CHECK-LABEL: test_vget_vset_lane_f16 -float16x4_t test_vget_vset_lane_f16(float16x4_t v1) { - float16_t a = vget_lane_f16(v1, 0); - return vset_lane_f16(a, v1, 3); -// CHECK: ins {{v[0-9]+}}.h[3], {{v[0-9]+}}.h[0] -} - -// CHECK-LABEL: test_vgetq_vsetq_lane_f16 -float16x8_t test_vgetq_vsetq_lane_f16(float16x8_t v1) { - float16_t a = vgetq_lane_f16(v1, 0); - return vsetq_lane_f16(a, v1, 7); -// CHECK: ins {{v[0-9]+}}.h[7], {{v[0-9]+}}.h[0] -} - -// CHECK-LABEL: test4_vset_lane_f16 -float16x4_t test4_vset_lane_f16(float16x4_t v1, float b, float c) { - float16_t a = (float16_t)b; - return vset_lane_f16(a, v1, 3); -// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} -// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} -} - -// CHECK-LABEL: test4_vsetq_lane_f16 -float16x8_t test4_vsetq_lane_f16(float16x8_t v1, float b, float c) { - float16_t a = (float16_t)b; - return vsetq_lane_f16(a, v1, 7); -// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} -// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} -} - -// CHECK-LABEL: test5_vset_lane_f16 -float16x4_t test5_vset_lane_f16(float16x4_t v1, float b, float c) { - float16_t a = (float16_t)b; - return vset_lane_f16(a, v1, 3); -// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} -// CHECK: ins {{v[0-9]+}}.h[3], {{w[0-9]+}} -} - -// CHECK-LABEL: test5_vsetq_lane_f16 -float16x8_t test5_vsetq_lane_f16(float16x8_t v1, float b, float c) { - float16_t a = (float16_t)b + 1.0; - return vsetq_lane_f16(a, v1, 7); -// CHECK: fmov {{w[0-9]+}}, {{s[0-9]+}} -// CHECK: ins {{v[0-9]+}}.h[7], {{w[0-9]+}} -} - -// CHECK-LABEL: test_vset_vget_lane_f16 -int test_vset_vget_lane_f16(float16x4_t a) { - float16x4_t b; - b = vset_lane_f16(3.5, a, 3); - float16_t c = vget_lane_f16(b, 3); - return (int)c; -// CHECK: movz x{{[0-9]+}}, #3 -} - -// CHECK-LABEL: test_vsetq_vgetq_lane_f16 -int test_vsetq_vgetq_lane_f16(float16x8_t a) { - float16x8_t b; - b = vsetq_lane_f16(3.5, a, 5); - float16_t c = vgetq_lane_f16(b, 5); - return (int)c; -// CHECK: movz x{{[0-9]+}}, #3 -} - -// CHECK-LABEL: test_vdup_laneq_p64: -poly64x1_t test_vdup_laneq_p64(poly64x2_t vec) { - return vdup_laneq_p64(vec, 0); -// CHECK-NEXT: ret -} - -// CHECK-LABEL: test_vdup_laneq_p64_1 -poly64x1_t test_vdup_laneq_p64_1(poly64x2_t vec) { - return vdup_laneq_p64(vec, 1); -// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -} - -// CHECK-LABEL: test_vget_lane_f32 -float32_t test_vget_lane_f32_1(float32x2_t v) { - return vget_lane_f32(v, 1); -// CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[1] -} - -// CHECK-LABEL: test_vget_lane_f64: -float64_t test_vget_lane_f64(float64x1_t v) { - return vget_lane_f64(v, 0); -// CHECK-NEXT: ret -} - -// CHECK-LABEL: test_vgetq_lane_f64_1 -float64_t test_vgetq_lane_f64_1(float64x2_t v) { - return vgetq_lane_f64(v, 1); -// CHECK: dup {{d[0-9]+}}, {{v[0-9]+}}.d[1] -} - -// CHECK-LABEL: test_vget_lane_f32: -float32_t test_vget_lane_f32(float32x2_t v) { - return vget_lane_f32(v, 0); -// CHECK-NEXT: ret -} - -// CHECK-LABEL: test_vgetq_lane_f32: -float32_t test_vgetq_lane_f32(float32x4_t v) { - return vgetq_lane_f32(v, 0); -// CHECK-NEXT: ret -} - -// CHECK-LABEL: test_vgetq_lane_f64: -float64_t test_vgetq_lane_f64(float64x2_t v) { - return vgetq_lane_f64(v, 0); -// CHECK-NEXT: ret -} - diff --git a/clang/test/CodeGen/aarch64-neon-extract.c b/clang/test/CodeGen/aarch64-neon-extract.c index b8da2b8..341fb9e 100644 --- a/clang/test/CodeGen/aarch64-neon-extract.c +++ b/clang/test/CodeGen/aarch64-neon-extract.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c index c63ce85..b4dfe14 100644 --- a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c index 68f2fbc..753edfa 100644 --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -1,9 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck -check-prefix=CHECK-FMA %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s // Test new aarch64 intrinsics and types diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index 83214f1..5c5209c 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64 @@ -8294,8 +8291,6 @@ uint64_t test_vsqaddd_u64(uint64_t a, uint64_t b) { } int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { -// CHECK-AARCH64-LABEL: test_vqdmlalh_s16 -// CHECK-AARCH64: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}} // CHECK-ARM64-LABEL: test_vqdmlalh_s16 // CHECK-ARM64: sqdmull v[[PROD:[0-9]+]].4s, {{v[0-9]+.4h}}, {{v[0-9]+.4h}} @@ -8310,8 +8305,6 @@ int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { } int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { -// CHECK-AARCH64-LABEL: test_vqdmlslh_s16 -// CHECK-AARCH64: sqdmlsl {{s[0-9]+|v[0-9]+.4s}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}} // CHECK-ARM64-LABEL: test_vqdmlslh_s16 // CHECK-ARM64: sqdmull v[[PROD:[0-9]+]].4s, {{v[0-9]+.4h}}, {{v[0-9]+.4h}} @@ -8572,8 +8565,6 @@ int64x1_t test_vshr_n_s64(int64x1_t a) { } uint64_t test_vshrd_n_u64(uint64_t a) { -// CHECK-AARCH64-LABEL: test_vshrd_n_u64 -// CHECK-AARCH64: {{ushr d[0-9]+, d[0-9]+, #64}} // CHECK-ARM64-LABEL: test_vshrd_n_u64 // CHECK-ARM64: mov x0, xzr @@ -8581,8 +8572,6 @@ uint64_t test_vshrd_n_u64(uint64_t a) { } uint64_t test_vshrd_n_u64_2() { -// CHECK-AARCH64-LABEL: test_vshrd_n_u64_2 -// CHECK-AARCH64: {{ushr d[0-9]+, d[0-9]+, #64}} // CHECK-ARM64-LABEL: test_vshrd_n_u64_2 // CHECK-ARM64: mov x0, xzr @@ -8639,8 +8628,6 @@ uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) { } uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { -// CHECK-AARCH64-LABEL: test_vsrad_n_u64_2 -// CHECK-AARCH64: {{usra d[0-9]+, d[0-9]+, #64}} // CHECK-ARM64-LABEL: test_vsrad_n_u64_2 // CHECK-ARM64-NOT: add diff --git a/clang/test/CodeGen/aarch64-neon-ld-sideeffect.c b/clang/test/CodeGen/aarch64-neon-ld-sideeffect.c deleted file mode 100644 index 1436dcd..0000000 --- a/clang/test/CodeGen/aarch64-neon-ld-sideeffect.c +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -emit-llvm -O0 -o - %s | FileCheck %s - -#include - -void *foo(void); - -float32x2_t bar(void) { - // CHECK-LABEL: @bar - return vld1_f32(foo()); - // CHECK: call i8* @foo - // CHECK-NOT: call i8* @foo - // CHECK: call <2 x float> @llvm.{{arm|arm64}}.neon.vld1 -} diff --git a/clang/test/CodeGen/aarch64-neon-ldst-one.c b/clang/test/CodeGen/aarch64-neon-ldst-one.c index 1f26f67..e163fe9 100644 --- a/clang/test/CodeGen/aarch64-neon-ldst-one.c +++ b/clang/test/CodeGen/aarch64-neon-ldst-one.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c index 75e6c5b..bab98ea 100644 --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-perm.c b/clang/test/CodeGen/aarch64-neon-perm.c index bcff83d..1a42470 100644 --- a/clang/test/CodeGen/aarch64-neon-perm.c +++ b/clang/test/CodeGen/aarch64-neon-perm.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-scalar-copy.c b/clang/test/CodeGen/aarch64-neon-scalar-copy.c index 41542f3..e43a66e 100644 --- a/clang/test/CodeGen/aarch64-neon-scalar-copy.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-copy.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c index ac7a752..3bba353 100644 --- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-cpu cyclone \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-shifts.c b/clang/test/CodeGen/aarch64-neon-shifts.c index a6cb9be..c0b7e17 100644 --- a/clang/test/CodeGen/aarch64-neon-shifts.c +++ b/clang/test/CodeGen/aarch64-neon-shifts.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -emit-llvm -O1 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -emit-llvm -O1 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c index 682fade..ed542f6 100644 --- a/clang/test/CodeGen/aarch64-neon-tbl.c +++ b/clang/test/CodeGen/aarch64-neon-tbl.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/aarch64-neon-vcombine.c b/clang/test/CodeGen/aarch64-neon-vcombine.c index 78f422e..3989f6b 100644 --- a/clang/test/CodeGen/aarch64-neon-vcombine.c +++ b/clang/test/CodeGen/aarch64-neon-vcombine.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -S -O3 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -S -O3 -o - %s | FileCheck %s // Test new aarch64 intrinsics and types diff --git a/clang/test/CodeGen/aarch64-neon-vget-hilo.c b/clang/test/CodeGen/aarch64-neon-vget-hilo.c index 96317ff..6b11d20 100644 --- a/clang/test/CodeGen/aarch64-neon-vget-hilo.c +++ b/clang/test/CodeGen/aarch64-neon-vget-hilo.c @@ -1,7 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix CHECK-COMMON --check-prefix CHECK-AARCH64 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix CHECK-COMMON --check-prefix CHECK-ARM64 @@ -12,98 +9,84 @@ int8x8_t test_vget_high_s8(int8x16_t a) { // CHECK-COMMON-LABEL: test_vget_high_s8: return vget_high_s8(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } int16x4_t test_vget_high_s16(int16x8_t a) { // CHECK-COMMON-LABEL: test_vget_high_s16: return vget_high_s16(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } int32x2_t test_vget_high_s32(int32x4_t a) { // CHECK-COMMON-LABEL: test_vget_high_s32: return vget_high_s32(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } int64x1_t test_vget_high_s64(int64x2_t a) { // CHECK-COMMON-LABEL: test_vget_high_s64: return vget_high_s64(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } uint8x8_t test_vget_high_u8(uint8x16_t a) { // CHECK-COMMON-LABEL: test_vget_high_u8: return vget_high_u8(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } uint16x4_t test_vget_high_u16(uint16x8_t a) { // CHECK-COMMON-LABEL: test_vget_high_u16: return vget_high_u16(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } uint32x2_t test_vget_high_u32(uint32x4_t a) { // CHECK-COMMON-LABEL: test_vget_high_u32: return vget_high_u32(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } uint64x1_t test_vget_high_u64(uint64x2_t a) { // CHECK-COMMON-LABEL: test_vget_high_u64: return vget_high_u64(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } poly64x1_t test_vget_high_p64(poly64x2_t a) { // CHECK-COMMON-LABEL: test_vget_high_p64: return vget_high_p64(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } float16x4_t test_vget_high_f16(float16x8_t a) { // CHECK-COMMON-LABEL: test_vget_high_f16: return vget_high_f16(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } float32x2_t test_vget_high_f32(float32x4_t a) { // CHECK-COMMON-LABEL: test_vget_high_f32: return vget_high_f32(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } poly8x8_t test_vget_high_p8(poly8x16_t a) { // CHECK-COMMON-LABEL: test_vget_high_p8: return vget_high_p8(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } poly16x4_t test_vget_high_p16(poly16x8_t a) { // CHECK-COMMON-LABEL: test_vget_high_p16 return vget_high_p16(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } float64x1_t test_vget_high_f64(float64x2_t a) { // CHECK-COMMON-LABEL: test_vget_high_f64 return vget_high_f64(a); - // CHECK-AARCH64: dup d0, {{v[0-9]+}}.d[1] // CHECK-ARM64: ext v0.16b, v0.16b, v0.16b, #8 } diff --git a/clang/test/CodeGen/aarch64-poly128.c b/clang/test/CodeGen/aarch64-poly128.c index 3a4d363..85b8a84 100644 --- a/clang/test/CodeGen/aarch64-poly128.c +++ b/clang/test/CodeGen/aarch64-poly128.c @@ -1,8 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ -// RUN: --check-prefix=CHECK-AARCH64 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ // RUN: --check-prefix=CHECK-ARM64 @@ -19,8 +15,6 @@ void test_vstrq_p128(poly128_t * ptr, poly128_t val) { // CHECK-LABEL: test_vstrq_p128 vstrq_p128(ptr, val); -// CHECK-AARCH64: str {{x[0-9]+}}, [{{x[0-9]+}}, #8] -// CHECK-AARCH64-NEXT: str {{x[0-9]+}}, [{{x[0-9]+}}] // CHECK-ARM64: stp {{x[0-9]+}}, {{x[0-9]+}}, [x0] } @@ -28,8 +22,6 @@ void test_vstrq_p128(poly128_t * ptr, poly128_t val) { poly128_t test_vldrq_p128(poly128_t * ptr) { // CHECK-LABEL: test_vldrq_p128 return vldrq_p128(ptr); - // CHECK-AARCH64: ldr {{x[0-9]+}}, [{{x[0-9]+}}] - // CHECK-AARCH64-NEXT: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8] // CHECK-ARM64: ldp {{x[0-9]+}}, {{x[0-9]+}}, [x0] } @@ -37,8 +29,6 @@ poly128_t test_vldrq_p128(poly128_t * ptr) { void test_ld_st_p128(poly128_t * ptr) { // CHECK-LABEL: test_ld_st_p128 vstrq_p128(ptr+1, vldrq_p128(ptr)); - // CHECK-AARCH64: ldr {{q[0-9]+}}, [{{x[0-9]+}}] - // CHECK-AARCH64-NEXT: str {{q[0-9]+}}, [{{x[0-9]+}}, #16] // CHECK-ARM64: ldp [[PLO:x[0-9]+]], [[PHI:x[0-9]+]], [{{x[0-9]+}}] // CHECK-ARM64-NEXT: stp [[PLO]], [[PHI]], [{{x[0-9]+}}, #16] diff --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c index beec675..8cfa0bc 100644 --- a/clang/test/CodeGen/aarch64-poly64.c +++ b/clang/test/CodeGen/aarch64-poly64.c @@ -1,8 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ -// RUN: --check-prefix=CHECK-AARCH64 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \ // RUN: --check-prefix=CHECK-ARM64 @@ -74,7 +70,6 @@ poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { // CHECK-LABEL: test_vcopy_lane_p64 return vcopy_lane_p64(a, 0, b, 0); - // CHECK-AARCH64: fmov {{d[0-9]+}}, {{d[0-9]+}} // CHECK-ARM64: mov v0.16b, v1.16b } @@ -88,7 +83,6 @@ poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vcopyq_laneq_p64 return vcopyq_laneq_p64(a, 1, b, 1); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1] } poly64x1_t test_vcreate_p64(uint64_t a) { @@ -135,28 +129,24 @@ poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) { poly64x1_t test_vld1_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld1_p64 return vld1_p64(ptr); - // CHECK-AARCH64: ld1 { {{v[0-9]+}}.1d }, [{{x[0-9]+|sp}}] // CHECK-ARM64: ldr {{d[0-9]+}}, [{{x[0-9]+|sp}}] } poly64x2_t test_vld1q_p64(poly64_t const * ptr) { // CHECK-LABEL: test_vld1q_p64 return vld1q_p64(ptr); - // CHECK-AARCH64: ld1 { {{v[0-9]+}}.2d }, [{{x[0-9]+|sp}}] // CHECK-ARM64: ldr {{q[0-9]+}}, [{{x[0-9]+|sp}}] } void test_vst1_p64(poly64_t * ptr, poly64x1_t val) { // CHECK-LABEL: test_vst1_p64 return vst1_p64(ptr, val); - // CHECK-AARCH64: st1 { {{v[0-9]+}}.1d }, [{{x[0-9]+|sp}}] // CHECK-ARM64: str {{d[0-9]+}}, [{{x[0-9]+|sp}}] } void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) { // CHECK-LABEL: test_vst1q_p64 return vst1q_p64(ptr, val); - // CHECK-AARCH64: st1 { {{v[0-9]+}}.2d }, [{{x[0-9]+|sp}}] // CHECK-ARM64: str {{q[0-9]+}}, [{{x[0-9]+|sp}}] } @@ -247,42 +237,36 @@ poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) { poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vzip1q_p64 return vzip1q_p64(a, b); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vzip2q_p64 return vzip2q_u64(a, b); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vuzp1q_p64 return vuzp1q_p64(a, b); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vuzp2q_p64 return vuzp2q_u64(a, b); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vtrn1q_p64 return vtrn1q_p64(a, b); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] // CHECK-ARM64: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) { // CHECK-LABEL: test_vtrn2q_p64 return vtrn2q_u64(a, b); - // CHECK-AARCH64: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1] // CHECK-ARM64: zip2 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d } diff --git a/clang/test/CodeGen/aarch64-type-sizes.c b/clang/test/CodeGen/aarch64-type-sizes.c index 3b3789f..b331b6c 100644 --- a/clang/test/CodeGen/aarch64-type-sizes.c +++ b/clang/test/CodeGen/aarch64-type-sizes.c @@ -1,6 +1,3 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s - -// RUN: %clang_cc1 -triple aarch64_be-none-linux-gnu -emit-llvm -w -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s // RUN: %clang_cc1 -triple arm64_be-none-linux-gnu -emit-llvm -w -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s // char by definition has size 1 diff --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c index 5f0ea6e..f787afe 100644 --- a/clang/test/CodeGen/aarch64-varargs.c +++ b/clang/test/CodeGen/aarch64-varargs.c @@ -1,5 +1,3 @@ -// RUN: %clang_cc1 -triple aarch64 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK --check-prefix=CHECK-LE %s -// RUN: %clang_cc1 -triple aarch64_be -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s // RUN: %clang_cc1 -triple arm64-linux-gnu -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s // RUN: %clang_cc1 -triple arm64_be-linux-gnu -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s diff --git a/clang/test/CodeGen/builtins-aarch64.c b/clang/test/CodeGen/builtins-aarch64.c deleted file mode 100644 index 8a93cb4..0000000 --- a/clang/test/CodeGen/builtins-aarch64.c +++ /dev/null @@ -1,6 +0,0 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O3 -emit-llvm -o - %s | FileCheck %s - -void f0(char *a, char *b) { - __clear_cache(a,b); -// CHECK: call {{.*}} @__clear_cache -} diff --git a/clang/test/CodeGen/named_reg_global.c b/clang/test/CodeGen/named_reg_global.c index 20f8b32..53f304ddf 100644 --- a/clang/test/CodeGen/named_reg_global.c +++ b/clang/test/CodeGen/named_reg_global.c @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -triple arm64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -triple armv7-linux-gnu -S -emit-llvm %s -o - | FileCheck %s diff --git a/clang/test/CodeGen/neon-crypto.c b/clang/test/CodeGen/neon-crypto.c index 551bafc..cd85d0c 100644 --- a/clang/test/CodeGen/neon-crypto.c +++ b/clang/test/CodeGen/neon-crypto.c @@ -1,9 +1,5 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -target-feature +crypto -emit-llvm -O1 -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm-none-linux-gnueabi -target-feature +neon \ // RUN: -target-feature +crypto -target-cpu cortex-a57 -emit-llvm -O1 -o - %s | FileCheck %s -// RUN: not %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -S -O3 -o - %s 2>&1 | FileCheck --check-prefix=CHECK-NO-CRYPTO %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -target-feature +crypto -emit-llvm -O1 -o - %s | FileCheck %s diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c index a2d0623..5153be9 100644 --- a/clang/test/CodeGen/target-data.c +++ b/clang/test/CodeGen/target-data.c @@ -122,8 +122,6 @@ // RUN: | FileCheck %s -check-prefix=R600SI // R600SI: target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" -// RUN: %clang_cc1 -triple aarch64-unknown -o - -emit-llvm %s | \ -// RUN: FileCheck %s -check-prefix=AARCH64 // RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \ // RUN: FileCheck %s -check-prefix=AARCH64 // AARCH64: target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" diff --git a/clang/test/CodeGenCXX/aarch64-arguments.cpp b/clang/test/CodeGenCXX/aarch64-arguments.cpp index 6e5e9f4..013051c 100644 --- a/clang/test/CodeGenCXX/aarch64-arguments.cpp +++ b/clang/test/CodeGenCXX/aarch64-arguments.cpp @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s // RUN: %clang_cc1 -triple arm64-none-linux -emit-llvm -w -o - %s | FileCheck -check-prefix=PCS %s // PCS: define void @{{.*}}(i8 %a diff --git a/clang/test/CodeGenCXX/aarch64-cxxabi.cpp b/clang/test/CodeGenCXX/aarch64-cxxabi.cpp index 92ceb08..6c08ff2 100644 --- a/clang/test/CodeGenCXX/aarch64-cxxabi.cpp +++ b/clang/test/CodeGenCXX/aarch64-cxxabi.cpp @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -emit-llvm -w -o - %s | FileCheck %s // Check differences between the generic Itanium ABI, the AArch32 version and diff --git a/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp index 4ed2df3e..7543a1c 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp @@ -1,6 +1,4 @@ -// REQUIRES: aarch64-registered-target // REQUIRES: arm64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon %s -emit-llvm -o - | FileCheck %s typedef unsigned char uint8_t; diff --git a/clang/test/CodeGenCXX/aarch64-neon.cpp b/clang/test/CodeGenCXX/aarch64-neon.cpp index 78876e2..fc7de1d 100644 --- a/clang/test/CodeGenCXX/aarch64-neon.cpp +++ b/clang/test/CodeGenCXX/aarch64-neon.cpp @@ -1,6 +1,3 @@ -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \ -// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s // REQUIRES: arm64-registered-target // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ // RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s diff --git a/clang/test/CodeGenCXX/int64_uint64.cpp b/clang/test/CodeGenCXX/int64_uint64.cpp index 0e5f279..ed31dda 100644 --- a/clang/test/CodeGenCXX/int64_uint64.cpp +++ b/clang/test/CodeGenCXX/int64_uint64.cpp @@ -3,11 +3,6 @@ // RUN: -target-cpu cortex-a8 \ // RUN: -emit-llvm -w -O1 -o - %s | FileCheck --check-prefix=CHECK-ARM %s -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-linux-gnueabi \ -// RUN: -target-feature +neon \ -// RUN: -emit-llvm -w -O1 -o - %s | FileCheck --check-prefix=CHECK-AARCH64 %s - // REQUIRES: arm64-registered-target // RUN: %clang_cc1 -triple arm64-linux-gnueabi \ // RUN: -target-feature +neon \ diff --git a/clang/test/Sema/atomic-ops.c b/clang/test/Sema/atomic-ops.c index 6f9f568..320abc5 100644 --- a/clang/test/Sema/atomic-ops.c +++ b/clang/test/Sema/atomic-ops.c @@ -1,5 +1,4 @@ // RUN: %clang_cc1 %s -verify -fsyntax-only -triple=i686-linux-gnu -std=c11 -// RUN: %clang_cc1 %s -verify -fsyntax-only -triple=aarch64-linux-gnu -std=c11 // Basic parsing/Sema tests for __c11_atomic_*