From 7cebf0af4076c7d198ef8ef90b79d1ff422a42cd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 1 Jun 2019 19:40:07 +0000 Subject: [PATCH] [InlineCost] Don't add the soft float function call cost for the fneg idiom, fsub -0.0, %x Summary: Fneg can be implemented with an xor rather than a function call so we don't need to add the function call overhead. This was pointed out in D62699 Reviewers: efriedma, cameron.mcinally Reviewed By: efriedma Subscribers: javed.absar, eraman, hiraditya, haicheng, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62747 llvm-svn: 362304 --- llvm/lib/Analysis/InlineCost.cpp | 7 +++++-- llvm/test/Transforms/Inline/ARM/inline-fp.ll | 25 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index ced30d6..a332a43 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -1095,9 +1096,11 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { // If the instruction is floating point, and the target says this operation // is expensive, this may eventually become a library call. Treat the cost - // as such. + // as such. Unless it's fneg which can be implemented with an xor. + using namespace llvm::PatternMatch; if (I.getType()->isFloatingPointTy() && - TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) + TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive && + !match(&I, m_FNeg(m_Value()))) addCost(InlineConstants::CallPenalty); return false; diff --git a/llvm/test/Transforms/Inline/ARM/inline-fp.ll b/llvm/test/Transforms/Inline/ARM/inline-fp.ll index fdc066c..1d74dfd 100644 --- a/llvm/test/Transforms/Inline/ARM/inline-fp.ll +++ b/llvm/test/Transforms/Inline/ARM/inline-fp.ll @@ -12,6 +12,8 @@ ; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) ; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) ; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75) +; NOFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75) ; FULLFP-DAG: single inlined into test_single with (cost=0, threshold=75) ; FULLFP-DAG: single inlined into test_single with (cost=-15000, threshold=75) @@ -21,6 +23,8 @@ ; FULLFP-DAG: double inlined into test_double with (cost=-15000, threshold=75) ; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) ; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; FULLFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75) +; FULLFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75) ; SINGLEFP-DAG: single inlined into test_single with (cost=0, threshold=75) ; SINGLEFP-DAG: single inlined into test_single with (cost=-15000, threshold=75) @@ -30,6 +34,8 @@ ; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) ; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) ; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75) +; SINGLEFP-DAG: single_force_soft_fneg not inlined into test_single_force_soft_fneg because too costly to inline (cost=100, threshold=75) define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 { %call = call float @single(i32 %a, i8 zeroext %b) @@ -55,6 +61,12 @@ define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 { ret i32 0 } +define i32 @test_single_force_soft_fneg(i32 %a, i8 %b, i32 %c, i8 %d) #1 { + %call = call float @single_force_soft_fneg(i32 %a, i8 zeroext %b) #1 + %call2 = call float @single_force_soft_fneg(i32 %c, i8 zeroext %d) #1 + ret i32 0 +} + define internal float @single(i32 %response, i8 zeroext %value1) #0 { entry: %conv = zext i8 %value1 to i32 @@ -106,6 +118,19 @@ entry: ret float %div } +define internal float @single_force_soft_fneg(i32 %response, i8 zeroext %value1) #1 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fsub float -0.0, %0 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + declare float @llvm.pow.f32(float, float) optsize minsize declare double @llvm.pow.f64(double, double) optsize minsize -- 2.7.4