From 18a1f0818b659cee13865b4fad2648d85984a4ed Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 10 Sep 2019 13:10:28 +0000 Subject: [PATCH] [InstCombine] Use SimplifyFMulInst to simplify multiply in fma. This allows us to fold fma's that multiply with 0.0. Also, the multiply by 1.0 case is handled there as well. The fneg/fabs cases are not handled by SimplifyFMulInst, so we need to keep them. Reviewers: spatel, anemet, lebedev.ri Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D67351 llvm-svn: 371518 --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 8 +++++--- llvm/test/Transforms/InstCombine/fma.ll | 15 +++++---------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index d304f98..3863e5f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2258,9 +2258,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return II; } - // fma x, 1, z -> fadd x, z - if (match(Src1, m_FPOne())) { - auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2)); + // Try to simplify the underlying FMul. + if (Value *V = SimplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1), + II->getFastMathFlags(), + SQ.getWithInstruction(II))) { + auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2)); FAdd->copyFastMathFlags(II); return FAdd; } diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll index 89fdc6b..42ed587 100644 --- a/llvm/test/Transforms/InstCombine/fma.ll +++ b/llvm/test/Transforms/InstCombine/fma.ll @@ -372,8 +372,7 @@ define float @fmuladd_x_1_z_fast(float %x, float %z) { define <2 x double> @fmuladd_a_0_b(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @fmuladd_a_0_b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[B:%.*]] ; entry: %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b) @@ -383,8 +382,7 @@ entry: define <2 x double> @fmuladd_0_a_b(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @fmuladd_0_a_b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[B:%.*]] ; entry: %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b) @@ -407,8 +405,7 @@ declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double define <2 x double> @fma_a_0_b(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @fma_a_0_b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[B:%.*]] ; entry: %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> zeroinitializer, <2 x double> %b) @@ -418,8 +415,7 @@ entry: define <2 x double> @fma_0_a_b(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @fma_0_a_b( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RES:%.*]] = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> [[A:%.*]], <2 x double> zeroinitializer, <2 x double> [[B:%.*]]) -; CHECK-NEXT: ret <2 x double> [[RES]] +; CHECK-NEXT: ret <2 x double> [[B:%.*]] ; entry: %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> zeroinitializer, <2 x double> %a, <2 x double> %b) @@ -440,8 +436,7 @@ entry: define <2 x double> @fma_sqrt(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: @fma_sqrt( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[A:%.*]]) -; CHECK-NEXT: [[RES:%.*]] = call fast <2 x double> @llvm.fma.v2f64(<2 x double> [[SQRT]], <2 x double> [[SQRT]], <2 x double> [[B:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = fadd fast <2 x double> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: ret <2 x double> [[RES]] ; entry: -- 2.7.4