From c9e8c9e3ea2681dc18dd3a2d43d6aa8f37252649 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 25 Jun 2020 11:28:04 -0400 Subject: [PATCH] [InstCombine] fold fmul/fdiv with fabs operands fabs(X) * fabs(Y) --> fabs(X * Y) fabs(X) / fabs(Y) --> fabs(X / Y) If both operands of fmul/fdiv are positive, then the result must be positive. There's a NAN corner-case that prevents removing the more specific fold just above this one: fabs(X) * fabs(X) -> X * X That fold works even with NAN because the sign-bit result of the multiply is not specified if X is NAN. We can't remove that and use the more general fold that is proposed here because once we convert to this: fabs (X * X) ...it is not legal to simplify the 'fabs' out of that expression when X is NAN. That's because fabs() guarantees that the sign-bit is always cleared - even for NAN values. So this patch has the potential to lose information, but it seems unlikely if we do the more specific fold ahead of this one. Differential Revision: https://reviews.llvm.org/D82277 --- llvm/lib/Transforms/InstCombine/InstCombineInternal.h | 1 + llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 15 ++++++++++++++- llvm/test/Transforms/InstCombine/fdiv.ll | 15 ++++++++------- llvm/test/Transforms/InstCombine/fmul.ll | 15 ++++++++------- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index d256e77..8caf580 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -635,6 +635,7 @@ private: Value *getSelectCondition(Value *A, Value *B); Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II); + Instruction *foldFPSignBitOps(BinaryOperator &I); public: /// Inserts an instruction \p New before instruction \p Old diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index cbbde9a..2d4fdd3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -402,7 +402,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return Changed ? &I : nullptr; } -static Instruction *foldFPSignBitOps(BinaryOperator &I) { +Instruction *InstCombiner::foldFPSignBitOps(BinaryOperator &I) { BinaryOperator::BinaryOps Opcode = I.getOpcode(); assert((Opcode == Instruction::FMul || Opcode == Instruction::FDiv) && "Expected fmul or fdiv"); @@ -420,6 +420,19 @@ static Instruction *foldFPSignBitOps(BinaryOperator &I) { if (Op0 == Op1 && match(Op0, m_Intrinsic(m_Value(X)))) return BinaryOperator::CreateWithCopiedFlags(Opcode, X, X, &I); + // fabs(X) * fabs(Y) --> fabs(X * Y) + // fabs(X) / fabs(Y) --> fabs(X / Y) + if (match(Op0, m_Intrinsic(m_Value(X))) && + match(Op1, m_Intrinsic(m_Value(Y))) && + (Op0->hasOneUse() || Op1->hasOneUse())) { + IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); + Builder.setFastMathFlags(I.getFastMathFlags()); + Value *XY = Builder.CreateBinOp(Opcode, X, Y); + Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, XY); + Fabs->takeName(&I); + return replaceInstUsesWith(I, Fabs); + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index deceb72..7b1a556 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -598,9 +598,8 @@ define float @fabs_same_op_extra_use(float %x) { define float @fabs_fabs(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs( -; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) -; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: ret float [[R]] ; %x.fabs = call float @llvm.fabs.f32(float %x) @@ -613,8 +612,8 @@ define float @fabs_fabs_extra_use1(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs_extra_use1( ; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: call void @use_f32(float [[X_FABS]]) -; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[R:%.*]] = fdiv ninf float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv ninf float [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: ret float [[R]] ; %x.fabs = call float @llvm.fabs.f32(float %x) @@ -626,10 +625,10 @@ define float @fabs_fabs_extra_use1(float %x, float %y) { define float @fabs_fabs_extra_use2(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs_extra_use2( -; CHECK-NEXT: [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]]) ; CHECK-NEXT: call void @use_f32(float [[Y_FABS]]) -; CHECK-NEXT: [[R:%.*]] = fdiv reassoc ninf float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc ninf float [[X:%.*]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: ret float [[R]] ; %x.fabs = call fast float @llvm.fabs.f32(float %x) @@ -639,6 +638,8 @@ define float @fabs_fabs_extra_use2(float %x, float %y) { ret float %r } +; negative test - don't create an extra instruction + define float @fabs_fabs_extra_use3(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs_extra_use3( ; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index 0559a79..8e168f2 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -482,9 +482,8 @@ define float @fabs_squared_fast(float %x) { define float @fabs_fabs(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs( -; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) -; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: ret float [[MUL]] ; %x.fabs = call float @llvm.fabs.f32(float %x) @@ -497,8 +496,8 @@ define float @fabs_fabs_extra_use1(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs_extra_use1( ; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: call void @use_f32(float [[X_FABS]]) -; CHECK-NEXT: [[Y_FABS:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[MUL:%.*]] = fmul ninf float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul ninf float [[X]], [[Y:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = call ninf float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: ret float [[MUL]] ; %x.fabs = call float @llvm.fabs.f32(float %x) @@ -510,10 +509,10 @@ define float @fabs_fabs_extra_use1(float %x, float %y) { define float @fabs_fabs_extra_use2(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs_extra_use2( -; CHECK-NEXT: [[X_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) ; CHECK-NEXT: [[Y_FABS:%.*]] = call fast float @llvm.fabs.f32(float [[Y:%.*]]) ; CHECK-NEXT: call void @use_f32(float [[Y_FABS]]) -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc ninf float [[X_FABS]], [[Y_FABS]] +; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc ninf float [[X:%.*]], [[Y]] +; CHECK-NEXT: [[MUL:%.*]] = call reassoc ninf float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: ret float [[MUL]] ; %x.fabs = call fast float @llvm.fabs.f32(float %x) @@ -523,6 +522,8 @@ define float @fabs_fabs_extra_use2(float %x, float %y) { ret float %mul } +; negative test - don't create an extra instruction + define float @fabs_fabs_extra_use3(float %x, float %y) { ; CHECK-LABEL: @fabs_fabs_extra_use3( ; CHECK-NEXT: [[X_FABS:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) -- 2.7.4