From e772618f1ee2fe7709a17e6d850623a97e23a8dc Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 20 Feb 2021 09:02:39 -0500 Subject: [PATCH] [InstCombine] fold fdiv with exp/exp2 divisor (PR49147) Follow-up to: D96648 / b40fde062 ...for the special-case base calls. From the earlier commit: This is unusual in the general (non-reciprocal) case because we need an extra instruction, but that should be better for general FP reassociation and codegen. We conservatively check for "arcp" FMF here as we do with existing fdiv folds, but it is not strictly necessary to have that. --- .../InstCombine/InstCombineMulDivRem.cpp | 13 ++++++++++++- llvm/test/Transforms/InstCombine/fdiv.ll | 22 ++++++++++++---------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 9bc566e..702f76d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1326,8 +1326,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { if (match(Op1, m_FDiv(m_SpecificFP(1.0), m_Value(Y)))) return BinaryOperator::CreateFMulFMF(Y, Op0, &I); - // Negate the exponent of pow to fold division-by-pow() into multiply: + // Negate the exponent of pow/exp to fold division-by-pow() into multiply: // Z / pow(X, Y) --> Z * pow(X, -Y) + // Z / exp{2}(Y) --> Z * exp{2}(-Y) // In the general case, this creates an extra instruction, but fmul allows // for better canonicalization and optimization than fdiv. if (match(Op1, @@ -1336,6 +1337,16 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { Value *Pow = Builder.CreateBinaryIntrinsic(Intrinsic::pow, X, NegY, &I); return BinaryOperator::CreateFMulFMF(Op0, Pow, &I); } + if (match(Op1, m_OneUse(m_Intrinsic(m_Value(Y))))) { + Value *NegY = Builder.CreateFNegFMF(Y, &I); + Value *Pow = Builder.CreateUnaryIntrinsic(Intrinsic::exp, NegY, &I); + return BinaryOperator::CreateFMulFMF(Op0, Pow, &I); + } + if (match(Op1, m_OneUse(m_Intrinsic(m_Value(Y))))) { + Value *NegY = Builder.CreateFNegFMF(Y, &I); + Value *Pow = Builder.CreateUnaryIntrinsic(Intrinsic::exp2, NegY, &I); + return BinaryOperator::CreateFMulFMF(Op0, Pow, &I); + } } if (I.hasAllowReassoc() && Op0->hasOneUse() && Op1->hasOneUse()) { diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index 3a7e87a..6ccbefa 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -731,8 +731,9 @@ define <2 x half> @pow_recip(<2 x half> %x, <2 x half> %y) { define float @exp_divisor(float %y, float %z) { ; CHECK-LABEL: @exp_divisor( -; CHECK-NEXT: [[P:%.*]] = call float @llvm.exp.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc arcp float [[Z:%.*]], [[P]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc arcp float [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc arcp float @llvm.exp.f32(float [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = fmul reassoc arcp float [[TMP2]], [[Z:%.*]] ; CHECK-NEXT: ret float [[R]] ; %p = call float @llvm.exp.f32(float %y) @@ -785,9 +786,9 @@ define float @exp_divisor_not_enough_fmf2(float %y, float %z) { define <2 x half> @exp_recip(<2 x half> %x, <2 x half> %y) { ; CHECK-LABEL: @exp_recip( -; CHECK-NEXT: [[P:%.*]] = call <2 x half> @llvm.exp.v2f16(<2 x half> [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc ninf arcp <2 x half> , [[P]] -; CHECK-NEXT: ret <2 x half> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc ninf arcp <2 x half> [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc ninf arcp <2 x half> @llvm.exp.v2f16(<2 x half> [[TMP1]]) +; CHECK-NEXT: ret <2 x half> [[TMP2]] ; %p = call <2 x half> @llvm.exp.v2f16(<2 x half> %y) %r = fdiv reassoc arcp ninf <2 x half> , %p @@ -796,8 +797,9 @@ define <2 x half> @exp_recip(<2 x half> %x, <2 x half> %y) { define float @exp2_divisor(float %y, float %z) { ; CHECK-LABEL: @exp2_divisor( -; CHECK-NEXT: [[P:%.*]] = call float @llvm.exp2.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc arcp float [[Z:%.*]], [[P]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc arcp float [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc arcp float @llvm.exp2.f32(float [[TMP1]]) +; CHECK-NEXT: [[R:%.*]] = fmul reassoc arcp float [[TMP2]], [[Z:%.*]] ; CHECK-NEXT: ret float [[R]] ; %p = call float @llvm.exp2.f32(float %y) @@ -850,9 +852,9 @@ define float @exp2_divisor_not_enough_fmf2(float %y, float %z) { define <2 x half> @exp2_recip(<2 x half> %x, <2 x half> %y) { ; CHECK-LABEL: @exp2_recip( -; CHECK-NEXT: [[P:%.*]] = call <2 x half> @llvm.exp2.v2f16(<2 x half> [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc ninf arcp <2 x half> , [[P]] -; CHECK-NEXT: ret <2 x half> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc ninf arcp <2 x half> [[Y:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call reassoc ninf arcp <2 x half> @llvm.exp2.v2f16(<2 x half> [[TMP1]]) +; CHECK-NEXT: ret <2 x half> [[TMP2]] ; %p = call <2 x half> @llvm.exp2.v2f16(<2 x half> %y) %r = fdiv reassoc arcp ninf <2 x half> , %p -- 2.7.4