From 19f9f374d9afe3efd33dc1943d3d7fd2c54fba76 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 20 Dec 2019 17:24:53 -0500
Subject: [PATCH] [SimplifyLibCalls] require fast-math-flags for pow(X, -0.5)
 transforms

As discussed in PR44330:
https://bugs.llvm.org/show_bug.cgi?id=44330
...the transform from pow(X, -0.5) libcall/intrinsic to
reciprocal square root can result in small deviations from
the expected result due to differences in the pow()
implementation and/or the extra rounding step from the division.

This patch proposes to allow that difference with either the
'approximate functions' or 'reassociate' FMF:
http://llvm.org/docs/LangRef.html#fast-math-flags

In practice, this likely means that the code is compiled with
all of 'fast' (-ffast-math), but I have preserved the existing
specializations for -0.0/-INF that enable generating safe code
if those special values are allowed simultaneously with
allowing approximation/reassociation.

The question about whether a similar restriction is needed for
the non-reciprocal case -- pow(X, 0.5) -- is deferred. That
transform is allowed without FMF currently, and this patch does
not change that behavior.

Differential Revision: https://reviews.llvm.org/D71706
---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp |  5 ++++
 llvm/test/Transforms/InstCombine/pow-sqrt.ll   | 36 ++++++++++++++++----------
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6c09600..fa3a9d2 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1644,6 +1644,11 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilder<> &B) {
       (!ExpoF->isExactlyValue(0.5) && !ExpoF->isExactlyValue(-0.5)))
     return nullptr;
 
+  // Converting pow(X, -0.5) to 1/sqrt(X) may introduce an extra rounding step,
+  // so that requires fast-math-flags (afn or reassoc).
+  if (ExpoF->isNegative() && (!Pow->hasApproxFunc() && !Pow->hasAllowReassoc()))
+    return nullptr;
+
   Sqrt = getSqrtCall(Base, Attrs, Pow->doesNotAccessMemory(), Mod, B, TLI);
   if (!Sqrt)
     return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll
index fee6fd3..588d4d1 100644
--- a/llvm/test/Transforms/InstCombine/pow-sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll
@@ -150,22 +150,22 @@ define double @pow_intrinsic_half_fast(double %x) {
   ret double %pow
 }
 
-; FIXME: This should not be transformed without some kind of FMF.
+; This should not be transformed without some kind of FMF.
 ; -0.5 means take the reciprocal.
 
 define float @pow_libcall_neghalf_no_FMF(float %x) {
 ; CHECK-LABEL: @pow_libcall_neghalf_no_FMF(
-; CHECK-NEXT:    [[SQRTF:%.*]] = call float @sqrtf(float [[X:%.*]])
-; CHECK-NEXT:    [[ABS:%.*]] = call float @llvm.fabs.f32(float [[SQRTF]])
-; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq float [[X]], 0xFFF0000000000000
-; CHECK-NEXT:    [[ABS_OP:%.*]] = fdiv float 1.000000e+00, [[ABS]]
-; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select i1 [[ISINF]], float 0.000000e+00, float [[ABS_OP]]
-; CHECK-NEXT:    ret float [[RECIPROCAL]]
+; CHECK-NEXT:    [[POW:%.*]] = call float @powf(float [[X:%.*]], float -5.000000e-01)
+; CHECK-NEXT:    ret float [[POW]]
 ;
   %pow = call float @powf(float %x, float -5.0e-01)
   ret float %pow
 }
 
+; Transform to sqrt+fdiv because 'reassoc' allows an extra rounding step.
+; Use 'fabs' to handle -0.0 correctly.
+; Use 'select' to handle -INF correctly.
+
 define float @pow_libcall_neghalf_reassoc(float %x) {
 ; CHECK-LABEL: @pow_libcall_neghalf_reassoc(
 ; CHECK-NEXT:    [[SQRTF:%.*]] = call reassoc float @sqrtf(float [[X:%.*]])
@@ -179,6 +179,10 @@ define float @pow_libcall_neghalf_reassoc(float %x) {
   ret float %pow
 }
 
+; Transform to sqrt+fdiv because 'afn' allows an extra rounding step.
+; Use 'fabs' to handle -0.0 correctly.
+; Use 'select' to handle -INF correctly.
+
 define float @pow_libcall_neghalf_afn(float %x) {
 ; CHECK-LABEL: @pow_libcall_neghalf_afn(
 ; CHECK-NEXT:    [[SQRTF:%.*]] = call afn float @sqrtf(float [[X:%.*]])
@@ -192,21 +196,21 @@ define float @pow_libcall_neghalf_afn(float %x) {
   ret float %pow
 }
 
-; FIXME: This should not be transformed without some kind of FMF.
+; This should not be transformed without some kind of FMF.
 
 define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF(
-; CHECK-NEXT:    [[SQRT:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
-; CHECK-NEXT:    [[ABS:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
-; CHECK-NEXT:    [[ISINF:%.*]] = fcmp oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
-; CHECK-NEXT:    [[ABS_OP:%.*]] = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[ABS]]
-; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[ABS_OP]]
-; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
+; CHECK-NEXT:    [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> <double -5.000000e-01, double -5.000000e-01>)
+; CHECK-NEXT:    ret <2 x double> [[POW]]
 ;
   %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -5.0e-01, double -5.0e-01>)
   ret <2 x double> %pow
 }
 
+; Transform to sqrt+fdiv because 'reassoc' allows an extra rounding step.
+; Use 'fabs' to handle -0.0 correctly.
+; Use 'select' to handle -INF correctly.
+
 define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_reassoc(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
@@ -220,6 +224,10 @@ define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) {
   ret <2 x double> %pow
 }
 
+; Transform to sqrt+fdiv because 'afn' allows an extra rounding step.
+; Use 'fabs' to handle -0.0 correctly.
+; Use 'select' to handle -INF correctly.
+
 define <2 x double> @pow_intrinsic_neghalf_afn(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_afn(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
-- 
2.7.4