From: Paul Osmialowski Date: Wed, 29 Mar 2023 11:54:22 +0000 (+0100) Subject: Revert "[TLI][AArch64] Extend SLEEF vectorized functions mapping with VLA functions" X-Git-Tag: upstream/17.0.6~13336 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f8f1909d36b2b1dee6ddcdf7bfed23aebf224437;p=platform%2Fupstream%2Fllvm.git Revert "[TLI][AArch64] Extend SLEEF vectorized functions mapping with VLA functions" Reverting it so I could land it with Arcanist. This reverts commit 59dcf927ee43e995374907b6846b657f68d7ea49. --- diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 1850b7a..1626798 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -31,7 +31,6 @@ struct VecDesc { StringRef ScalarFnName; StringRef VectorFnName; ElementCount VectorizationFactor; - bool Masked; }; enum LibFunc : unsigned { @@ -162,8 +161,7 @@ public: /// Return true if the function F has a vector equivalent with vectorization /// factor VF. bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const { - return !(getVectorizedFunction(F, VF, false).empty() && - getVectorizedFunction(F, VF, true).empty()); + return !getVectorizedFunction(F, VF).empty(); } /// Return true if the function F has a vector equivalent with any @@ -172,8 +170,7 @@ public: /// Return the name of the equivalent of F, vectorized with factor VF. If no /// such mapping exists, return the empty string. - StringRef getVectorizedFunction(StringRef F, const ElementCount &VF, - bool Masked) const; + StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const; /// Set to true iff i32 parameters to library functions should have signext /// or zeroext attributes if they correspond to C-level int or unsigned int, @@ -349,9 +346,8 @@ public: bool isFunctionVectorizable(StringRef F) const { return Impl->isFunctionVectorizable(F); } - StringRef getVectorizedFunction(StringRef F, const ElementCount &VF, - bool Masked = false) const { - return Impl->getVectorizedFunction(F, VF, Masked); + StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const { + return Impl->getVectorizedFunction(F, VF); } /// Tests if the function is both available and a candidate for optimized code diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index f5aaa49..34f5b39 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -19,11 +19,9 @@ #define FIXED(NL) ElementCount::getFixed(NL) #define SCALABLE(NL) ElementCount::getScalable(NL) -#define NOMASK false -#define MASKED true #if !(defined(TLI_DEFINE_VECFUNC)) -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, NOMASK}, +#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF}, #endif #if defined(TLI_DEFINE_ACCELERATE_VECFUNCS) @@ -606,91 +604,10 @@ TLI_DEFINE_VECFUNC( "llvm.tanh.f32", "_ZGVnN4v_tanhf", FIXED(4)) TLI_DEFINE_VECFUNC( "tgammaf", "_ZGVnN4v_tgammaf", FIXED(4)) TLI_DEFINE_VECFUNC( "llvm.tgamma.f32", "_ZGVnN4v_tgammaf", FIXED(4)) -#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS) - -TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("log", "_ZGVsMxv_log", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVsMxvv_powf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("sin", "_ZGVsMxv_sin", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED) -TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("sqrt", "_ZGVsMxv_sqrt", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED) - -TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED) -TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED) - #else #error "Must choose which vector library functions are to be defined." #endif -#undef MASKED -#undef NOMASK -#undef SCALABLE -#undef FIXED - #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS #undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS @@ -699,5 +616,4 @@ TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED) #undef TLI_DEFINE_SVML_VECFUNCS #undef TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS #undef TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS -#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS_NAMES diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index b19b102..da223f3 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -192,7 +192,7 @@ std::optional tryDemangleForVFABI(StringRef MangledName, /// where: /// /// = "_LLVM_" -/// = "M" if masked, "N" if no mask. +/// = "N". Note: TLI does not support masked interfaces. /// = Number of concurrent lanes, stored in the `VectorizationFactor` /// field of the `VecDesc` struct. If the number of lanes is scalable /// then 'x' is printed instead. @@ -200,8 +200,7 @@ std::optional tryDemangleForVFABI(StringRef MangledName, /// = the name of the scalar function. /// = the name of the vector function. std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, - unsigned numArgs, ElementCount VF, - bool Masked = false); + unsigned numArgs, ElementCount VF); /// Retrieve the `VFParamKind` from a string token. VFParamKind getVFParamKindFromString(const StringRef Token); diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 6661441..c57c2be 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -1181,17 +1181,10 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( case SLEEFGNUABI: { const VecDesc VecFuncs_VF2[] = { #define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false}, #include "llvm/Analysis/VecFuncs.def" }; const VecDesc VecFuncs_VF4[] = { #define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false}, -#include "llvm/Analysis/VecFuncs.def" - }; - const VecDesc VecFuncs_VFScalable[] = { -#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS -#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK}, #include "llvm/Analysis/VecFuncs.def" }; @@ -1202,7 +1195,6 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( case llvm::Triple::aarch64_be: addVectorizableFunctions(VecFuncs_VF2); addVectorizableFunctions(VecFuncs_VF4); - addVectorizableFunctions(VecFuncs_VFScalable); break; } break; @@ -1222,16 +1214,16 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName; } -StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, - const ElementCount &VF, - bool Masked) const { +StringRef +TargetLibraryInfoImpl::getVectorizedFunction(StringRef F, + const ElementCount &VF) const { F = sanitizeFunctionName(F); if (F.empty()) return F; std::vector::const_iterator I = llvm::lower_bound(VectorDescs, F, compareWithScalarFnName); while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) { - if ((I->VectorizationFactor == VF) && (I->Masked == Masked)) + if (I->VectorizationFactor == VF) return I->VectorFnName; ++I; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index cab0082..31c2de1 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1529,10 +1529,10 @@ void InterleaveGroup::addMetadata(Instruction *NewInst) const { std::string VFABI::mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, - ElementCount VF, bool Masked) { + ElementCount VF) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - Out << "_ZGV" << VFABI::_LLVM_ << (Masked ? "M" : "N"); + Out << "_ZGV" << VFABI::_LLVM_ << "N"; if (VF.isScalable()) Out << 'x'; else diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp index 873c322..55bcb6f3 100644 --- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp +++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp @@ -40,7 +40,7 @@ STATISTIC(NumCompUsedAdded, /// CI (other than void) need to be widened to a VectorType of VF /// lanes. static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, - bool Predicate, const StringRef VFName) { + const StringRef VFName) { Module *M = CI.getModule(); // Add function declaration. @@ -50,8 +50,6 @@ static void addVariantDeclaration(CallInst &CI, const ElementCount &VF, Tys.push_back(ToVectorTy(ArgOperand->getType(), VF)); assert(!CI.getFunctionType()->isVarArg() && "VarArg functions are not supported."); - if (Predicate) - Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF)); FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false); Function *VectorF = Function::Create(FTy, Function::ExternalLinkage, VFName, M); @@ -91,19 +89,19 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { const SetVector OriginalSetOfMappings(Mappings.begin(), Mappings.end()); - auto AddVariantDecl = [&](const ElementCount &VF, bool Predicate) { + auto AddVariantDecl = [&](const ElementCount &VF) { const std::string TLIName = - std::string(TLI.getVectorizedFunction(ScalarName, VF, Predicate)); + std::string(TLI.getVectorizedFunction(ScalarName, VF)); if (!TLIName.empty()) { - std::string MangledName = VFABI::mangleTLIVectorName( - TLIName, ScalarName, CI.arg_size(), VF, Predicate); + std::string MangledName = + VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF); if (!OriginalSetOfMappings.count(MangledName)) { Mappings.push_back(MangledName); ++NumCallInjected; } Function *VariantF = M->getFunction(TLIName); if (!VariantF) - addVariantDeclaration(CI, VF, Predicate, TLIName); + addVariantDeclaration(CI, VF, TLIName); } }; @@ -111,15 +109,13 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) { ElementCount WidestFixedVF, WidestScalableVF; TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF); - for (bool Predicated : {false, true}) { - for (ElementCount VF = ElementCount::getFixed(2); - ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2) - AddVariantDecl(VF, Predicated); + for (ElementCount VF = ElementCount::getFixed(2); + ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2) + AddVariantDecl(VF); - for (ElementCount VF = ElementCount::getScalable(2); - ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2) - AddVariantDecl(VF, Predicated); - } + // TODO: Add scalable variants once we're able to test them. + assert(WidestScalableVF.isZero() && + "Scalable vector mappings not yet supported"); VFABI::setVectorVariantNames(&CI, Mappings); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll index 0d14ecc..be43099 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll @@ -1,6 +1,5 @@ ; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail. -; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,NEON -; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,SVE +; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-gnu" @@ -12,8 +11,7 @@ declare float @llvm.acos.f32(float) #0 define void @acos_f64(double* nocapture %varray) { ; CHECK-LABEL: @acos_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_acos( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -36,8 +34,7 @@ define void @acos_f64(double* nocapture %varray) { define void @acos_f32(float* nocapture %varray) { ; CHECK-LABEL: @acos_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_acosf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -65,8 +62,7 @@ declare float @llvm.asin.f32(float) #0 define void @asin_f64(double* nocapture %varray) { ; CHECK-LABEL: @asin_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_asin( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -89,8 +85,7 @@ define void @asin_f64(double* nocapture %varray) { define void @asin_f32(float* nocapture %varray) { ; CHECK-LABEL: @asin_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_asinf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -118,8 +113,7 @@ declare float @llvm.atan.f32(float) #0 define void @atan_f64(double* nocapture %varray) { ; CHECK-LABEL: @atan_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atan( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -142,8 +136,7 @@ define void @atan_f64(double* nocapture %varray) { define void @atan_f32(float* nocapture %varray) { ; CHECK-LABEL: @atan_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atanf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -171,8 +164,7 @@ declare float @llvm.atan2.f32(float, float) #0 define void @atan2_f64(double* nocapture %varray) { ; CHECK-LABEL: @atan2_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_atan2( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -195,8 +187,7 @@ define void @atan2_f64(double* nocapture %varray) { define void @atan2_f32(float* nocapture %varray) { ; CHECK-LABEL: @atan2_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_atan2f( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -224,8 +215,7 @@ declare float @llvm.atanh.f32(float) #0 define void @atanh_f64(double* nocapture %varray) { ; CHECK-LABEL: @atanh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atanh( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_atanh(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -248,8 +238,7 @@ define void @atanh_f64(double* nocapture %varray) { define void @atanh_f32(float* nocapture %varray) { ; CHECK-LABEL: @atanh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_atanhf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_atanhf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -277,8 +266,7 @@ declare float @llvm.cos.f32(float) #0 define void @cos_f64(double* nocapture %varray) { ; CHECK-LABEL: @cos_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_cos( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -301,8 +289,7 @@ define void @cos_f64(double* nocapture %varray) { define void @cos_f32(float* nocapture %varray) { ; CHECK-LABEL: @cos_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_cosf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -330,8 +317,7 @@ declare float @llvm.cosh.f32(float) #0 define void @cosh_f64(double* nocapture %varray) { ; CHECK-LABEL: @cosh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_cosh( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -354,8 +340,7 @@ define void @cosh_f64(double* nocapture %varray) { define void @cosh_f32(float* nocapture %varray) { ; CHECK-LABEL: @cosh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_coshf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -383,8 +368,7 @@ declare float @llvm.exp.f32(float) #0 define void @exp_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -407,8 +391,7 @@ define void @exp_f64(double* nocapture %varray) { define void @exp_f32(float* nocapture %varray) { ; CHECK-LABEL: @exp_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_expf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -436,8 +419,7 @@ declare float @llvm.exp2.f32(float) #0 define void @exp2_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp2_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp2( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -460,8 +442,7 @@ define void @exp2_f64(double* nocapture %varray) { define void @exp2_f32(float* nocapture %varray) { ; CHECK-LABEL: @exp2_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp2f( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -489,8 +470,7 @@ declare float @llvm.exp10.f32(float) #0 define void @exp10_f64(double* nocapture %varray) { ; CHECK-LABEL: @exp10_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp10( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -513,8 +493,7 @@ define void @exp10_f64(double* nocapture %varray) { define void @exp10_f32(float* nocapture %varray) { ; CHECK-LABEL: @exp10_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_exp10f( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -542,8 +521,7 @@ declare float @llvm.lgamma.f32(float) #0 define void @lgamma_f64(double* nocapture %varray) { ; CHECK-LABEL: @lgamma_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_lgamma( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_lgamma(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -566,8 +544,7 @@ define void @lgamma_f64(double* nocapture %varray) { define void @lgamma_f32(float* nocapture %varray) { ; CHECK-LABEL: @lgamma_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_lgammaf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_lgammaf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -595,8 +572,7 @@ declare float @llvm.log10.f32(float) #0 define void @log10_f64(double* nocapture %varray) { ; CHECK-LABEL: @log10_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log10( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -619,8 +595,7 @@ define void @log10_f64(double* nocapture %varray) { define void @log10_f32(float* nocapture %varray) { ; CHECK-LABEL: @log10_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log10f( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -699,8 +674,7 @@ declare float @llvm.log.f32(float) #0 define void @log_f64(double* nocapture %varray) { ; CHECK-LABEL: @log_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_log( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -723,8 +697,7 @@ define void @log_f64(double* nocapture %varray) { define void @log_f32(float* nocapture %varray) { ; CHECK-LABEL: @log_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_logf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -752,8 +725,7 @@ declare float @llvm.pow.f32(float, float) #0 define void @pow_f64(double* nocapture %varray) { ; CHECK-LABEL: @pow_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_pow( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -776,8 +748,7 @@ define void @pow_f64(double* nocapture %varray) { define void @pow_f32(float* nocapture %varray) { ; CHECK-LABEL: @pow_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxvv_powf( [[TMP4:%.*]], [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -805,8 +776,7 @@ declare float @llvm.sin.f32(float) #0 define void @sin_f64(double* nocapture %varray) { ; CHECK-LABEL: @sin_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sin( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -829,8 +799,7 @@ define void @sin_f64(double* nocapture %varray) { define void @sin_f32(float* nocapture %varray) { ; CHECK-LABEL: @sin_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sinf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -858,8 +827,7 @@ declare float @llvm.sinh.f32(float) #0 define void @sinh_f64(double* nocapture %varray) { ; CHECK-LABEL: @sinh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sinh( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -882,8 +850,7 @@ define void @sinh_f64(double* nocapture %varray) { define void @sinh_f32(float* nocapture %varray) { ; CHECK-LABEL: @sinh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sinhf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -911,8 +878,7 @@ declare float @llvm.sqrt.f32(float) #0 define void @sqrt_f64(double* nocapture %varray) { ; CHECK-LABEL: @sqrt_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sqrt( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_sqrt(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -935,8 +901,7 @@ define void @sqrt_f64(double* nocapture %varray) { define void @sqrt_f32(float* nocapture %varray) { ; CHECK-LABEL: @sqrt_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_sqrtf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_sqrtf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -957,10 +922,10 @@ define void @sqrt_f32(float* nocapture %varray) { ret void } + define void @llvm_sqrt_f64(double* nocapture %varray) { ; CHECK-LABEL: @llvm_sqrt_f64( - ; NEON: [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call fast @llvm.sqrt.nxv2f64( [[TMP4:%.*]]) + ; CHECK: [[TMP5:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -983,8 +948,7 @@ define void @llvm_sqrt_f64(double* nocapture %varray) { define void @llvm_sqrt_f32(float* nocapture %varray) { ; CHECK-LABEL: @llvm_sqrt_f32( - ; NEON: [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call fast @llvm.sqrt.nxv4f32( [[TMP4:%.*]]) + ; CHECK: [[TMP5:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -1012,8 +976,7 @@ declare float @llvm.tan.f32(float) #0 define void @tan_f64(double* nocapture %varray) { ; CHECK-LABEL: @tan_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tan( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -1036,8 +999,7 @@ define void @tan_f64(double* nocapture %varray) { define void @tan_f32(float* nocapture %varray) { ; CHECK-LABEL: @tan_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tanf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -1065,8 +1027,7 @@ declare float @llvm.tanh.f32(float) #0 define void @tanh_f64(double* nocapture %varray) { ; CHECK-LABEL: @tanh_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tanh( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -1089,8 +1050,7 @@ define void @tanh_f64(double* nocapture %varray) { define void @tanh_f32(float* nocapture %varray) { ; CHECK-LABEL: @tanh_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tanhf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -1118,8 +1078,7 @@ declare float @llvm.tgamma.f32(float) #0 define void @tgamma_f64(double* nocapture %varray) { ; CHECK-LABEL: @tgamma_f64( - ; NEON: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tgamma( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <2 x double> @_ZGVnN2v_tgamma(<2 x double> [[TMP4:%.*]]) ; CHECK: ret void ; entry: @@ -1142,8 +1101,7 @@ define void @tgamma_f64(double* nocapture %varray) { define void @tgamma_f32(float* nocapture %varray) { ; CHECK-LABEL: @tgamma_f32( - ; NEON: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]]) - ; SVE: [[TMP5:%.*]] = call @_ZGVsMxv_tgammaf( [[TMP4:%.*]], {{.*}}) + ; CHECK: [[TMP5:%.*]] = call <4 x float> @_ZGVnN4v_tgammaf(<4 x float> [[TMP4:%.*]]) ; CHECK: ret void ; entry: