SVML support for log10, sqrt

author Wenlei He <aktoon@gmail.com>

Wed, 16 Sep 2020 00:29:32 +0000 (17:29 -0700)

committer Wenlei He <aktoon@gmail.com>

Wed, 16 Sep 2020 00:29:44 +0000 (17:29 -0700)
author Wenlei He <aktoon@gmail.com>
Wed, 16 Sep 2020 00:29:32 +0000 (17:29 -0700)
committer Wenlei He <aktoon@gmail.com>
Wed, 16 Sep 2020 00:29:44 +0000 (17:29 -0700)
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def

index 9fdbf638078f4eee382b11726b649dc8a75c2836..a47ee3c147252323461c1cd35adfb14055afd209 100644 (file)
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -269,6 +269,54 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f4", 4)
  TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f8", 8)
  TLI_DEFINE_VECFUNC("llvm.log2.f32", "__svml_log2f16", 16)
  
+TLI_DEFINE_VECFUNC("log10", "__svml_log102", 2)
+TLI_DEFINE_VECFUNC("log10", "__svml_log104", 4)
+TLI_DEFINE_VECFUNC("log10", "__svml_log108", 8)
+
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f4", 4)
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f8", 8)
+TLI_DEFINE_VECFUNC("log10f", "__svml_log10f16", 16)
+
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log102", 2)
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log104", 4)
+TLI_DEFINE_VECFUNC("__log10_finite", "__svml_log108", 8)
+
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f4", 4)
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f8", 8)
+TLI_DEFINE_VECFUNC("__log10f_finite", "__svml_log10f16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log102", 2)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log104", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__svml_log108", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f4", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f8", 8)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__svml_log10f16", 16)
+
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt2", 2)
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt4", 4)
+TLI_DEFINE_VECFUNC("sqrt", "__svml_sqrt8", 8)
+
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf4", 4)
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf8", 8)
+TLI_DEFINE_VECFUNC("sqrtf", "__svml_sqrtf16", 16)
+
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt2", 2)
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt4", 4)
+TLI_DEFINE_VECFUNC("__sqrt_finite", "__svml_sqrt8", 8)
+
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf4", 4)
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf8", 8)
+TLI_DEFINE_VECFUNC("__sqrtf_finite", "__svml_sqrtf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt2", 2)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt4", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__svml_sqrt8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf4", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf8", 8)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__svml_sqrtf16", 16)
+
  TLI_DEFINE_VECFUNC("exp2", "__svml_exp22", 2)
  TLI_DEFINE_VECFUNC("exp2", "__svml_exp24", 4)
  TLI_DEFINE_VECFUNC("exp2", "__svml_exp28", 8)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll

index dd6692d75e5f5f4c978faa29661a6e3443be825f..a6e191c3d6923c922ed1636031ecce64fe6ce1c8 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll
@@ -300,3 +300,117 @@ for.end:                                          ; preds = %for.body
  !91 = distinct !{!31, !32, !33}
  !92 = !{!"llvm.loop.vectorize.width", i32 4}
  !93 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+declare float @__log10f_finite(float) #0
+
+; CHECK-LABEL: @log10_f32
+; CHECK: <4 x float> @__svml_log10f4
+; CHECK: ret
+define void @log10_f32(float* nocapture %varray) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call fast float @__log10f_finite(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!101 = distinct !{!21, !22, !23}
+!102 = !{!"llvm.loop.vectorize.width", i32 4}
+!103 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+declare double @__log10_finite(double) #0
+
+; CHECK-LABEL: @log10_f64
+; CHECK: <4 x double> @__svml_log104
+; CHECK: ret
+define void @log10_f64(double* nocapture %varray) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call fast double @__log10_finite(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
+  store double %call, double* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!111 = distinct !{!31, !32, !33}
+!112 = !{!"llvm.loop.vectorize.width", i32 4}
+!113 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+declare float @__sqrtf_finite(float) #0
+
+; CHECK-LABEL: @sqrt_f32
+; CHECK: <4 x float> @__svml_sqrtf4
+; CHECK: ret
+define void @sqrt_f32(float* nocapture %varray) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call fast float @__sqrtf_finite(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %indvars.iv
+  store float %call, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!121 = distinct !{!21, !22, !23}
+!122 = !{!"llvm.loop.vectorize.width", i32 4}
+!123 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+declare double @__sqrt_finite(double) #0
+
+; CHECK-LABEL: @sqrt_f64
+; CHECK: <4 x double> @__svml_sqrt4
+; CHECK: ret
+define void @sqrt_f64(double* nocapture %varray) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call fast double @__sqrt_finite(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %indvars.iv
+  store double %call, double* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!131 = distinct !{!31, !32, !33}
+!132 = !{!"llvm.loop.vectorize.width", i32 4}
+!133 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll

index c074830075521e93e4cab035d961b1397d31e8a6..da6b4696ba2bad5a8b4f5c4c215ecf8e18f7c037 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll
@@ -33,6 +33,16 @@ declare float @log2f(float) #0
  declare double @llvm.log2.f64(double) #0
  declare float @llvm.log2.f32(float) #0
  
+declare double @log10(double) #0
+declare float @log10f(float) #0
+declare double @llvm.log10.f64(double) #0
+declare float @llvm.log10.f32(float) #0
+
+declare double @sqrt(double) #0
+declare float @sqrtf(float) #0
+declare double @llvm.sqrt.f64(double) #0
+declare float @llvm.sqrt.f32(float) #0
+
  declare double @exp2(double) #0
  declare float @exp2f(float) #0
  declare double @llvm.exp2.f64(double) #0
@@ -598,6 +608,190 @@ for.end:
    ret void
  }
  
+define void @log10_f64(double* nocapture %varray) {
+; CHECK-LABEL: @log10_f64(
+; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @log10(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f32(float* nocapture %varray) {
+; CHECK-LABEL: @log10_f32(
+; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @log10f(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @log10_f64_intrinsic(
+; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_log104(<4 x double> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log10.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @log10_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @log10_f32_intrinsic(
+; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_log10f4(<4 x float> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.log10.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f64(double* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f64(
+; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @sqrt(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f32(float* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f32(
+; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @sqrtf(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f64_intrinsic(double* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f64_intrinsic(
+; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_sqrt4(<4 x double> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.sqrt.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @sqrt_f32_intrinsic(float* nocapture %varray) {
+; CHECK-LABEL: @sqrt_f32_intrinsic(
+; CHECK:    [[TMP5:%.*]] = call <4 x float> @__svml_sqrtf4(<4 x float> [[TMP4:%.*]])
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.sqrt.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
  define void @exp2_f64(double* nocapture %varray) {
  ; CHECK-LABEL: @exp2_f64(
  ; CHECK:    [[TMP5:%.*]] = call <4 x double> @__svml_exp24(<4 x double> [[TMP4:%.*]])
diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll

index c68a9c9a71c65abf767d2de5b30642bc5009a956..75e32528ac7c58de7bde1716ed3a8d0c0f41a6f1 100644 (file)
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -9,10 +9,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-unknown-linux-gnu"
  
  ; COMMON-LABEL: @llvm.compiler.used = appending global
-; SVML-SAME:        [3 x i8*] [
+; SVML-SAME:        [6 x i8*] [
  ; SVML-SAME:          i8* bitcast (<2 x double> (<2 x double>)* @__svml_sin2 to i8*),
  ; SVML-SAME:          i8* bitcast (<4 x double> (<4 x double>)* @__svml_sin4 to i8*),
-; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*)
+; SVML-SAME:          i8* bitcast (<8 x double> (<8 x double>)* @__svml_sin8 to i8*),
+; SVML-SAME:          i8* bitcast (<4 x float> (<4 x float>)* @__svml_log10f4 to i8*),
+; SVML-SAME:          i8* bitcast (<8 x float> (<8 x float>)* @__svml_log10f8 to i8*),
+; SVML-SAME:          i8* bitcast (<16 x float> (<16 x float>)* @__svml_log10f16 to i8*)
  ; MASSV-SAME:       [2 x i8*] [
  ; MASSV-SAME:         i8* bitcast (<2 x double> (<2 x double>)* @__sind2_massv to i8*),
  ; MASSV-SAME:         i8* bitcast (<4 x float> (<4 x float>)* @__log10f4_massv to i8*)
author	Wenlei He <aktoon@gmail.com>
	Wed, 16 Sep 2020 00:29:32 +0000 (17:29 -0700)
committer	Wenlei He <aktoon@gmail.com>
	Wed, 16 Sep 2020 00:29:44 +0000 (17:29 -0700)
llvm/include/llvm/Analysis/VecFuncs.def		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/svml-calls-finite.ll		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/X86/svml-calls.ll		patch \| blob \| history
llvm/test/Transforms/Util/add-TLI-mappings.ll		patch \| blob \| history