From 19f9f374d9afe3efd33dc1943d3d7fd2c54fba76 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 20 Dec 2019 17:24:53 -0500 Subject: [PATCH] [SimplifyLibCalls] require fast-math-flags for pow(X, -0.5) transforms As discussed in PR44330: https://bugs.llvm.org/show_bug.cgi?id=44330 ...the transform from pow(X, -0.5) libcall/intrinsic to reciprocal square root can result in small deviations from the expected result due to differences in the pow() implementation and/or the extra rounding step from the division. This patch proposes to allow that difference with either the 'approximate functions' or 'reassociate' FMF: http://llvm.org/docs/LangRef.html#fast-math-flags In practice, this likely means that the code is compiled with all of 'fast' (-ffast-math), but I have preserved the existing specializations for -0.0/-INF that enable generating safe code if those special values are allowed simultaneously with allowing approximation/reassociation. The question about whether a similar restriction is needed for the non-reciprocal case -- pow(X, 0.5) -- is deferred. That transform is allowed without FMF currently, and this patch does not change that behavior. Differential Revision: https://reviews.llvm.org/D71706 --- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 5 ++++ llvm/test/Transforms/InstCombine/pow-sqrt.ll | 36 ++++++++++++++++---------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 6c09600..fa3a9d2 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1644,6 +1644,11 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) { (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5))) return nullptr; + // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step, + // so that requires fast-math-flags (afn or reassoc). + if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc())) + return nullptr; + Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI); if (!Sqrt) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll index fee6fd3..588d4d1 100644 --- a/llvm/test/Transforms/InstCombine/pow-sqrt.ll +++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll @@ -150,22 +150,22 @@ define double @pow_intrinsic_half_fast(double %x) { ret double %pow } -; FIXME: This should not be transformed without some kind of FMF. +; This should not be transformed without some kind of FMF. ; -0.5 means take the reciprocal. define float @pow_libcall_neghalf_no_FMF(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_no_FMF( -; CHECK-NEXT: [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000 -; CHECK-NEXT: [[ABS_OP:%.*]] = fdiv float 1.000000e+00, [[ABS]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = select i1 [[ISINF]], float 0.000000e+00, float [[ABS_OP]] -; CHECK-NEXT: ret float [[RECIPROCAL]] +; CHECK-NEXT: [[POW:%.*]] = call float @powf(float [[X:%.*]], float -5.000000e-01) +; CHECK-NEXT: ret float [[POW]] ; %pow = call float @powf(float %x, float -5.0e-01) ret float %pow } +; Transform to sqrt+fdiv because 'reassoc' allows an extra rounding step. +; Use 'fabs' to handle -0.0 correctly. +; Use 'select' to handle -INF correctly. + define float @pow_libcall_neghalf_reassoc(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_reassoc( ; CHECK-NEXT: [[SQRTF:%.*]] = call reassoc float @sqrtf(float [[X:%.*]]) @@ -179,6 +179,10 @@ define float @pow_libcall_neghalf_reassoc(float %x) { ret float %pow } +; Transform to sqrt+fdiv because 'afn' allows an extra rounding step. +; Use 'fabs' to handle -0.0 correctly. +; Use 'select' to handle -INF correctly. + define float @pow_libcall_neghalf_afn(float %x) { ; CHECK-LABEL: @pow_libcall_neghalf_afn( ; CHECK-NEXT: [[SQRTF:%.*]] = call afn float @sqrtf(float [[X:%.*]]) @@ -192,21 +196,21 @@ define float @pow_libcall_neghalf_afn(float %x) { ret float %pow } -; FIXME: This should not be transformed without some kind of FMF. +; This should not be transformed without some kind of FMF. define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF( -; CHECK-NEXT: [[SQRT:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) -; CHECK-NEXT: [[ABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]]) -; CHECK-NEXT: [[ISINF:%.*]] = fcmp oeq <2 x double> [[X]], -; CHECK-NEXT: [[ABS_OP:%.*]] = fdiv <2 x double> , [[ABS]] -; CHECK-NEXT: [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[ABS_OP]] -; CHECK-NEXT: ret <2 x double> [[RECIPROCAL]] +; CHECK-NEXT: [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> ) +; CHECK-NEXT: ret <2 x double> [[POW]] ; %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> ) ret <2 x double> %pow } +; Transform to sqrt+fdiv because 'reassoc' allows an extra rounding step. +; Use 'fabs' to handle -0.0 correctly. +; Use 'select' to handle -INF correctly. + define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_reassoc( ; CHECK-NEXT: [[SQRT:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) @@ -220,6 +224,10 @@ define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) { ret <2 x double> %pow } +; Transform to sqrt+fdiv because 'afn' allows an extra rounding step. +; Use 'fabs' to handle -0.0 correctly. +; Use 'select' to handle -INF correctly. + define <2 x double> @pow_intrinsic_neghalf_afn(<2 x double> %x) { ; CHECK-LABEL: @pow_intrinsic_neghalf_afn( ; CHECK-NEXT: [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]]) -- 2.7.4