[DAGCombiner] try to convert pow(x, 1/3) to cbrt(x)

author Sanjay Patel <spatel@rotateright.com>

Sun, 16 Sep 2018 16:50:26 +0000 (16:50 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 16 Sep 2018 16:50:26 +0000 (16:50 +0000)
author Sanjay Patel <spatel@rotateright.com>
Sun, 16 Sep 2018 16:50:26 +0000 (16:50 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 16 Sep 2018 16:50:26 +0000 (16:50 +0000)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h

index 842f27f..ec9c461 100644 (file)
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -550,11 +550,8 @@ namespace ISD {
      /// is often a storage-only type but has native conversions.
      FP16_TO_FP, FP_TO_FP16,
  
-    /// FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
-    /// FLOG, FLOG2, FLOG10, FEXP, FEXP2,
-    /// FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary
-    /// floating point operations. These are inspired by libm.
-    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    /// Perform various unary floating-point operations inspired by libm.
+    FNEG, FABS, FSQRT, FCBRT, FSIN, FCOS, FPOWI, FPOW,
      FLOG, FLOG2, FLOG10, FEXP, FEXP2,
      FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
      /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def

index 840a002..8900512 100644 (file)
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -128,6 +128,11 @@ HANDLE_LIBCALL(SQRT_F64, "sqrt")
  HANDLE_LIBCALL(SQRT_F80, "sqrtl")
  HANDLE_LIBCALL(SQRT_F128, "sqrtl")
  HANDLE_LIBCALL(SQRT_PPCF128, "sqrtl")
+HANDLE_LIBCALL(CBRT_F32, "cbrtf")
+HANDLE_LIBCALL(CBRT_F64, "cbrt")
+HANDLE_LIBCALL(CBRT_F80, "cbrtl")
+HANDLE_LIBCALL(CBRT_F128, "cbrtl")
+HANDLE_LIBCALL(CBRT_PPCF128, "cbrtl")
  HANDLE_LIBCALL(LOG_F32, "logf")
  HANDLE_LIBCALL(LOG_F64, "log")
  HANDLE_LIBCALL(LOG_F80, "logl")
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 20e749b..4ef16cb 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11571,6 +11571,34 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
    if (!ExponentC)
      return SDValue();
  
+  // Try to convert x ** (1/3) into cube root.
+  // TODO: Handle the various flavors of long double.
+  // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
+  //       Some range near 1/3 should be fine.
+  EVT VT = N->getValueType(0);
+  if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
+      (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
+    // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
+    // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
+    // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
+    // For regular numbers, rounding may cause the results to differ.
+    // Therefore, we require { nsz ninf nnan afn } for this transform.
+    // TODO: We could select out the special cases if we don't have nsz/ninf.
+    SDNodeFlags Flags = N->getFlags();
+    if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
+        !Flags.hasApproximateFuncs())
+      return SDValue();
+
+    // Do not create a cbrt() libcall if the target does not have it, and do not
+    // turn a pow that has lowering support into a cbrt() libcall.
+    if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
+        (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
+         DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
+      return SDValue();
+
+    return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
+  }
+
    // Try to convert x ** (1/4) into square roots.
    // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
    // TODO: This could be extended (using a target hook) to handle smaller
@@ -11587,7 +11615,6 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
        return SDValue();
  
      // Don't double the number of libcalls. We are trying to inline fast code.
-    EVT VT = N->getValueType(0);
      if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
        return SDValue();
  
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

index 13d412c..b6bd854 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4047,6 +4047,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                        RTLIB::SQRT_F80, RTLIB::SQRT_F128,
                                        RTLIB::SQRT_PPCF128));
      break;
+  case ISD::FCBRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
+                                      RTLIB::CBRT_F80, RTLIB::CBRT_F128,
+                                      RTLIB::CBRT_PPCF128));
+    break;
    case ISD::FSIN:
    case ISD::STRICT_FSIN:
      Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

index 7289579..594a587 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -181,6 +181,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
    case ISD::FNEG:                       return "fneg";
    case ISD::FSQRT:                      return "fsqrt";
    case ISD::STRICT_FSQRT:               return "strict_fsqrt";
+  case ISD::FCBRT:                      return "fcbrt";
    case ISD::FSIN:                       return "fsin";
    case ISD::STRICT_FSIN:                return "strict_fsin";
    case ISD::FCOS:                       return "fcos";
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp

index aeb321f..b9cdbea 100644 (file)
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -666,6 +666,7 @@ void TargetLoweringBase::initActions() {
  
    // These library functions default to expand.
    for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
+    setOperationAction(ISD::FCBRT,      VT, Expand);
      setOperationAction(ISD::FLOG ,      VT, Expand);
      setOperationAction(ISD::FLOG2,      VT, Expand);
      setOperationAction(ISD::FLOG10,     VT, Expand);
diff --git a/llvm/test/CodeGen/X86/pow.ll b/llvm/test/CodeGen/X86/pow.ll

index db8ac76..639f7dd 100644 (file)
--- a/llvm/test/CodeGen/X86/pow.ll
+++ b/llvm/test/CodeGen/X86/pow.ll
@@ -7,6 +7,8 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
  declare double @llvm.pow.f64(double, double)
  declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
  
+declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80)
+
  define float @pow_f32_one_fourth_fmf(float %x) nounwind {
  ; CHECK-LABEL: pow_f32_one_fourth_fmf:
  ; CHECK:       # %bb.0:
@@ -165,8 +167,7 @@ define <2 x double> @pow_v2f64_one_fourth_not_enough_fmf(<2 x double> %x) nounwi
  define float @pow_f32_one_third_fmf(float %x) nounwind {
  ; CHECK-LABEL: pow_f32_one_third_fmf:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT:    jmp powf # TAILCALL
+; CHECK-NEXT:    jmp cbrtf # TAILCALL
    %one = uitofp i32 1 to float
    %three = uitofp i32 3 to float
    %exp = fdiv float %one, %three
@@ -177,8 +178,7 @@ define float @pow_f32_one_third_fmf(float %x) nounwind {
  define double @pow_f64_one_third_fmf(double %x) nounwind {
  ; CHECK-LABEL: pow_f64_one_third_fmf:
  ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT:    jmp pow # TAILCALL
+; CHECK-NEXT:    jmp cbrt # TAILCALL
    %one = uitofp i32 1 to double
    %three = uitofp i32 3 to double
    %exp = fdiv double %one, %three
@@ -186,3 +186,45 @@ define double @pow_f64_one_third_fmf(double %x) nounwind {
    ret double %r
  }
  
+; TODO: We could turn this into cbrtl, but currently we only handle float/double types.
+
+define x86_fp80 @pow_f80_one_third_fmf(x86_fp80 %x) nounwind {
+; CHECK-LABEL: pow_f80_one_third_fmf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{.*}}(%rip)
+; CHECK-NEXT:    fstpt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpt (%rsp)
+; CHECK-NEXT:    callq powl
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+  %one = uitofp i32 1 to x86_fp80
+  %three = uitofp i32 3 to x86_fp80
+  %exp = fdiv x86_fp80 %one, %three
+  %r = call nsz nnan ninf afn x86_fp80 @llvm.pow.f80(x86_fp80 %x, x86_fp80 %exp)
+  ret x86_fp80 %r
+}
+
+; We might want to allow this. The exact hex value for 1/3 as a double is 0x3fd5555555555555.
+
+define double @pow_f64_not_exactly_one_third_fmf(double %x) nounwind {
+; CHECK-LABEL: pow_f64_not_exactly_one_third_fmf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    jmp pow # TAILCALL
+  %r = call nsz nnan ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555556)
+  ret double %r
+}
+
+; We require all 4 of nsz, ninf, nnan, afn.
+
+define double @pow_f64_not_enough_fmf(double %x) nounwind {
+; CHECK-LABEL: pow_f64_not_enough_fmf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    jmp pow # TAILCALL
+  %r = call nsz ninf afn double @llvm.pow.f64(double %x, double 0x3fd5555555555555)
+  ret double %r
+}
+
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 16 Sep 2018 16:50:26 +0000 (16:50 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 16 Sep 2018 16:50:26 +0000 (16:50 +0000)
llvm/include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
llvm/include/llvm/IR/RuntimeLibcalls.def		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp		patch \| blob \| history
llvm/lib/CodeGen/TargetLoweringBase.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/pow.ll		patch \| blob \| history