[InstCombine] fold fdiv with pow divisor (PR49147)

author Sanjay Patel <spatel@rotateright.com>

Sun, 14 Feb 2021 13:06:15 +0000 (08:06 -0500)

committer Sanjay Patel <spatel@rotateright.com>

Sun, 14 Feb 2021 13:07:36 +0000 (08:07 -0500)
author Sanjay Patel <spatel@rotateright.com>
Sun, 14 Feb 2021 13:06:15 +0000 (08:06 -0500)
committer Sanjay Patel <spatel@rotateright.com>
Sun, 14 Feb 2021 13:07:36 +0000 (08:07 -0500)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

index 5526739..9bc566e 100644 (file)
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1325,6 +1325,17 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
      // replaced by a multiplication.
      if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y))))
        return BinaryOperator::CreateFMulFMF(Y, Op0, &I);
+
+    // Negate the exponent of pow to fold division-by-pow() into multiply:
+    // Z / pow(X, Y) --> Z * pow(X, -Y)
+    // In the general case, this creates an extra instruction, but fmul allows
+    // for better canonicalization and optimization than fdiv.
+    if (match(Op1,
+              m_OneUse(m_Intrinsic<Intrinsic::pow>(m_Value(X), m_Value(Y))))) {
+      Value *NegY = Builder.CreateFNegFMF(Y, &I);
+      Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, NegY, &I);
+      return BinaryOperator::CreateFMulFMF(Op0, Pow, &I);
+    }
    }
  
    if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) {
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll

index f1858cb..52bf746 100644 (file)
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -661,8 +661,9 @@ define float @fabs_fabs_extra_use3(float %x, float %y) {
  
  define float @pow_divisor(float %x, float %y, float %z) {
  ; CHECK-LABEL: @pow_divisor(
-; CHECK-NEXT:    [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc arcp float [[Z:%.*]], [[P]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc arcp float [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc arcp float @llvm.pow.f32(float [[X:%.*]], float [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = fmul reassoc arcp float [[TMP2]], [[Z:%.*]]
  ; CHECK-NEXT:    ret float [[R]]
  ;
    %p = call float @llvm.pow.f32(float %x, float %y)
@@ -670,6 +671,8 @@ define float @pow_divisor(float %x, float %y, float %z) {
    ret float %r
  }
  
+; Negative test - don't create an extra pow
+
  define float @pow_divisor_extra_use(float %x, float %y, float %z) {
  ; CHECK-LABEL: @pow_divisor_extra_use(
  ; CHECK-NEXT:    [[P:%.*]] = call float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -683,6 +686,8 @@ define float @pow_divisor_extra_use(float %x, float %y, float %z) {
    ret float %r
  }
  
+; Negative test - must have reassoc+arcp
+
  define float @pow_divisor_not_enough_fmf(float %x, float %y, float %z) {
  ; CHECK-LABEL: @pow_divisor_not_enough_fmf(
  ; CHECK-NEXT:    [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -694,6 +699,8 @@ define float @pow_divisor_not_enough_fmf(float %x, float %y, float %z) {
    ret float %r
  }
  
+; Negative test - must have reassoc+arcp
+
  define float @pow_divisor_not_enough_fmf2(float %x, float %y, float %z) {
  ; CHECK-LABEL: @pow_divisor_not_enough_fmf2(
  ; CHECK-NEXT:    [[P:%.*]] = call fast float @llvm.pow.f32(float [[X:%.*]], float [[Y:%.*]])
@@ -705,11 +712,13 @@ define float @pow_divisor_not_enough_fmf2(float %x, float %y, float %z) {
    ret float %r
  }
  
+; Special-case - reciprocal does not require extra fmul
+
  define <2 x half> @pow_recip(<2 x half> %x, <2 x half> %y) {
  ; CHECK-LABEL: @pow_recip(
-; CHECK-NEXT:    [[P:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc ninf arcp <2 x half> <half 0xH3C00, half 0xH3C00>, [[P]]
-; CHECK-NEXT:    ret <2 x half> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fneg reassoc ninf arcp <2 x half> [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc ninf arcp <2 x half> @llvm.pow.v2f16(<2 x half> [[X:%.*]], <2 x half> [[TMP1]])
+; CHECK-NEXT:    ret <2 x half> [[TMP2]]
  ;
    %p = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
    %r = fdiv reassoc arcp ninf <2 x half> <half 1.0, half 1.0>, %p
author	Sanjay Patel <spatel@rotateright.com>
	Sun, 14 Feb 2021 13:06:15 +0000 (08:06 -0500)
committer	Sanjay Patel <spatel@rotateright.com>
	Sun, 14 Feb 2021 13:07:36 +0000 (08:07 -0500)
llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp		patch \| blob \| history
llvm/test/Transforms/InstCombine/fdiv.ll		patch \| blob \| history