From: Florian Hahn Date: Mon, 10 May 2021 19:49:19 +0000 (+0100) Subject: [VecLib] Add support for vector fns from Darwin's libsystem. X-Git-Tag: llvmorg-14-init~7127 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=93a9a8a8d90f5b9bb6965ebb1104082692d41833;p=platform%2Fupstream%2Fllvm.git [VecLib] Add support for vector fns from Darwin's libsystem. This patch adds support for Darwin's libsystem math vector functions to TLI. Darwin's libsystem provides a range of vector functions for libm functions. This initial patch only adds the 2 x double and 4 x float versions, which are available on both X86 and ARM64. On X86, wider vector versions are supported as well. Reviewed By: jroelofs Differential Revision: https://reviews.llvm.org/D101856 --- diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index adc6363..a7ad218 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -86,11 +86,12 @@ public: /// addVectorizableFunctionsFromVecLib for filling up the tables of /// vectorizable functions. enum VectorLibrary { - NoLibrary, // Don't use any vector library. - Accelerate, // Use Accelerate framework. - LIBMVEC_X86,// GLIBC Vector Math library. - MASSV, // IBM MASS vector library. - SVML // Intel short vector math library. + NoLibrary, // Don't use any vector library. + Accelerate, // Use Accelerate framework. + DarwinLibSystemM, // Use Darwin's libsystem_m. + LIBMVEC_X86, // GLIBC Vector Math library. + MASSV, // IBM MASS vector library. + SVML // Intel short vector math library. }; TargetLibraryInfoImpl(); diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def index 3391afd..2539ff0 100644 --- a/llvm/include/llvm/Analysis/VecFuncs.def +++ b/llvm/include/llvm/Analysis/VecFuncs.def @@ -64,6 +64,59 @@ TLI_DEFINE_VECFUNC("asinhf", "vasinhf", FIXED(4)) TLI_DEFINE_VECFUNC("acoshf", "vacoshf", FIXED(4)) TLI_DEFINE_VECFUNC("atanhf", "vatanhf", FIXED(4)) +#elif defined(TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS) +// Darwin libsystem_m vector functions. + +// Exponential and Logarithmic Functions +TLI_DEFINE_VECFUNC("exp", "_simd_exp_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.exp.f64", "_simd_exp_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("expf", "_simd_exp_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.exp.f32", "_simd_exp_f4", FIXED(4)) + +// Trigonometric Functions +TLI_DEFINE_VECFUNC("acos", "_simd_acos_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("acosf", "_simd_acos_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("asin", "_simd_asin_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("asinf", "_simd_asin_f4", FIXED(4)) + +TLI_DEFINE_VECFUNC("atan", "_simd_atan_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4)) + +TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("cosf", "_simd_cos_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.cos.f32", "_simd_cos_f4", FIXED(4)) + +TLI_DEFINE_VECFUNC("sin", "_simd_sin_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.sin.f64", "_simd_sin_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("sinf", "_simd_sin_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.sin.f32", "_simd_sin_f4", FIXED(4)) + +// Floating-Point Arithmetic and Auxiliary Functions +TLI_DEFINE_VECFUNC("cbrt", "_simd_cbrt_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("cbrtf", "_simd_cbrt_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("erf", "_simd_erf_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("erff", "_simd_erf_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("pow", "_simd_pow_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("llvm.pow.f64", "_simd_pow_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("powf", "_simd_pow_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("llvm.pow.f32", "_simd_pow_f4", FIXED(4)) + +// Hyperbolic Functions +TLI_DEFINE_VECFUNC("sinh", "_simd_sinh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("sinhf", "_simd_sinh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("cosh", "_simd_cosh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("coshf", "_simd_cosh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("tanh", "_simd_tanh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("tanhf", "_simd_tanh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("asinh", "_simd_asinh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("asinhf", "_simd_asinh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("acosh", "_simd_acosh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("acoshf", "_simd_acosh_f4", FIXED(4)) +TLI_DEFINE_VECFUNC("atanh", "_simd_atanh_d2", FIXED(2)) +TLI_DEFINE_VECFUNC("atanhf", "_simd_atanh_f4", FIXED(4)) #elif defined(TLI_DEFINE_LIBMVEC_X86_VECFUNCS) // GLIBC Vector math Functions @@ -419,6 +472,7 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16)) #undef TLI_DEFINE_VECFUNC #undef TLI_DEFINE_ACCELERATE_VECFUNCS +#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS #undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS #undef TLI_DEFINE_MASSV_VECFUNCS #undef TLI_DEFINE_SVML_VECFUNCS diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 9ea84f1..153ba07 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -24,6 +24,8 @@ static cl::opt ClVectorLibrary( "No vector functions library"), clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate", "Accelerate framework"), + clEnumValN(TargetLibraryInfoImpl::DarwinLibSystemM, + "Darwin_libsystem_m", "Darwin libsystem_m"), clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86", "GLIBC Vector Math library"), clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV", @@ -1622,6 +1624,14 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( addVectorizableFunctions(VecFuncs); break; } + case DarwinLibSystemM: { + const VecDesc VecFuncs[] = { + #define TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS + #include "llvm/Analysis/VecFuncs.def" + }; + addVectorizableFunctions(VecFuncs); + break; + } case LIBMVEC_X86: { const VecDesc VecFuncs[] = { #define TLI_DEFINE_LIBMVEC_X86_VECFUNCS diff --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib-darwin-libsystem-m.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib-darwin-libsystem-m.ll new file mode 100644 index 0000000..7602e19 --- /dev/null +++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib-darwin-libsystem-m.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes +; RUN: opt -vector-library=Darwin_libsystem_m -replace-with-veclib -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define <4 x float> @sin_v4f32(<4 x float> %in) { +; CHECK-LABEL: define {{[^@]+}}@sin_v4f32 +; CHECK-SAME: (<4 x float> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @_simd_sin_f4(<4 x float> [[IN]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %call = call <4 x float> @llvm.sin.v4f32(<4 x float> %in) + ret <4 x float> %call +} +declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0 + +define <2 x double> @sin_v2f64(<2 x double> %in) { +; CHECK-LABEL: define {{[^@]+}}@sin_v2f64 +; CHECK-SAME: (<2 x double> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @_simd_sin_d2(<2 x double> [[IN]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %call = call <2 x double> @llvm.sin.v2f64(<2 x double> %in) + ret <2 x double> %call +} +declare <2 x double> @llvm.sin.v2f64(<2 x double>) + + +define <2 x double> @exp_v2(<2 x double> %in) { +; CHECK-LABEL: define {{[^@]+}}@exp_v2 +; CHECK-SAME: (<2 x double> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @_simd_exp_d2(<2 x double> [[IN]]) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %call = call <2 x double> @llvm.exp.v2f64(<2 x double> %in) + ret <2 x double> %call +} + +declare <2 x double> @llvm.exp.v2f64(<2 x double>) #0 + +define <4 x float> @exp_f32(<4 x float> %in) { +; CHECK-LABEL: define {{[^@]+}}@exp_f32 +; CHECK-SAME: (<4 x float> [[IN:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @_simd_exp_f4(<4 x float> [[IN]]) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %call = call <4 x float> @llvm.exp.v4f32(<4 x float> %in) + ret <4 x float> %call +} + +declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 + +define <3 x double> @exp_v3(<3 x double> %in) { +; CHECK-LABEL: define {{[^@]+}}@exp_v3 +; CHECK-SAME: (<3 x double> [[IN:%.*]]) { +; CHECK-NEXT: [[CALL:%.*]] = call <3 x double> @llvm.exp.v3f64(<3 x double> [[IN]]) +; CHECK-NEXT: ret <3 x double> [[CALL]] +; + %call = call <3 x double> @llvm.exp.v3f64(<3 x double> %in) + ret <3 x double> %call +} + +declare <3 x double> @llvm.exp.v3f64(<3 x double>) #0 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll new file mode 100644 index 0000000..14367c4 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-calls-libsystem-darwin.ll @@ -0,0 +1,724 @@ +; RUN: opt < %s -vector-library=Darwin_libsystem_m -inject-tli-mappings -loop-vectorize -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "arm64-apple-darwin" + +declare float @expf(float) nounwind readnone +define void @expf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @expf_v4f32( +; CHECK: call <4 x float> @_simd_exp_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @expf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @exp(double) nounwind readnone +define void @exp_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @exp_v2f64( +; CHECK: call <2 x double> @_simd_exp_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @exp(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @acosf(float) nounwind readnone +define void @acos_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @acos_v4f32( +; CHECK: call <4 x float> @_simd_acos_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @acosf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @acos(double) nounwind readnone +define void @acos_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @acos_v2f64( +; CHECK: call <2 x double> @_simd_acos_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @acos(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @asinf(float) nounwind readnone +define void @asinf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @asinf_v4f32( +; CHECK: call <4 x float> @_simd_asin_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @asinf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @asin(double) nounwind readnone +define void @asin_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @asin_v2f64( +; CHECK: call <2 x double> @_simd_asin_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @asin(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + + declare float @atanf(float) nounwind readnone +define void @atanf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @atanf_v4f32( +; CHECK: call <4 x float> @_simd_atan_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @atanf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @atan(double) nounwind readnone +define void @atan_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @atan_v2f64( +; CHECK: call <2 x double> @_simd_atan_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @atan(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @atan2f(float) nounwind readnone +define void @atan2f_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @atan2f_v4f32( +; CHECK: call <4 x float> @_simd_atan2_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @atan2f(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @atan2(double) nounwind readnone +define void @atan2_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @atan2_v2f64( +; CHECK: call <2 x double> @_simd_atan2_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @atan2(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @cosf(float) nounwind readnone +define void @cosf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @cosf_v4f32( +; CHECK: call <4 x float> @_simd_cos_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @cosf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @cos(double) nounwind readnone +define void @cos_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @cos_v2f64( +; CHECK: call <2 x double> @_simd_cos_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @cos(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @cbrtf(float) nounwind readnone +define void @cbrtf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @cbrtf_v4f32( +; CHECK: call <4 x float> @_simd_cbrt_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @cbrtf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @cbrt(double) nounwind readnone +define void @cbrt_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @cbrt_v2f64( +; CHECK: call <2 x double> @_simd_cbrt_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @cbrt(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @erff(float) nounwind readnone +define void @erff_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @erff_v4f32( +; CHECK: call <4 x float> @_simd_erf_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @erff(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @erf(double) nounwind readnone +define void @erf_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @erf_v2f64( +; CHECK: call <2 x double> @_simd_erf_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @erf(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @powf(float) nounwind readnone +define void @powf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @powf_v4f32( +; CHECK: call <4 x float> @_simd_pow_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @powf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @pow(double) nounwind readnone +define void @pow_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @pow_v2f64( +; CHECK: call <2 x double> @_simd_pow_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @pow(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @sinhf(float) nounwind readnone +define void @sinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @sinhf_v4f32( +; CHECK: call <4 x float> @_simd_sinh_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @sinhf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @sinh(double) nounwind readnone +define void @sinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @sinh_v2f64( +; CHECK: call <2 x double> @_simd_sinh_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @sinh(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @coshf(float) nounwind readnone +define void @coshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @coshf_v4f32( +; CHECK: call <4 x float> @_simd_cosh_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @coshf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @cosh(double) nounwind readnone +define void @cosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @cosh_v2f64( +; CHECK: call <2 x double> @_simd_cosh_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @cosh(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @tanhf(float) nounwind readnone +define void @tanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @tanhf_v4f32( +; CHECK: call <4 x float> @_simd_tanh_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @tanhf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @tanh(double) nounwind readnone +define void @tanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @tanh_v2f64( +; CHECK: call <2 x double> @_simd_tanh_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @tanh(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @asinhf(float) nounwind readnone +define void @asinhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @asinhf_v4f32( +; CHECK: call <4 x float> @_simd_asinh_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @asinhf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @asinh(double) nounwind readnone +define void @asinh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @asinh_v2f64( +; CHECK: call <2 x double> @_simd_asinh_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @asinh(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @acoshf(float) nounwind readnone +define void @acoshf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @acoshf_v4f32( +; CHECK: call <4 x float> @_simd_acosh_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @acoshf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @acosh(double) nounwind readnone +define void @acosh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @acosh_v2f64( +; CHECK: call <2 x double> @_simd_acosh_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @acosh(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare float @atanhf(float) nounwind readnone +define void @atanhf_v4f32(i64 %n, float* noalias %y, float* noalias %x) { +; CHECK-LABEL: @atanhf_v4f32( +; CHECK: call <4 x float> @_simd_atanh_f4( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds float, float* %y, i64 %iv + %lv = load float, float* %gep.y, align 4 + %call = tail call float @atanhf(float %lv) + %gep.x = getelementptr inbounds float, float* %x, i64 %iv + store float %call, float* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +declare double @atanh(double) nounwind readnone +define void @atanh_v2f64(i64 %n, double* noalias %y, double * noalias %x) { +; CHECK-LABEL: @atanh_v2f64( +; CHECK: call <2 x double> @_simd_atanh_d2( +; CHECK: ret void + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %gep.y = getelementptr inbounds double, double* %y, i64 %iv + %lv = load double, double* %gep.y, align 4 + %call = tail call double @atanh(double %lv) + %gep.x = getelementptr inbounds double, double* %x, i64 %iv + store double %call, double* %gep.x, align 4 + %iv.next = add i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +}