Added a slew of SimplifyInstruction floating-point optimizations, many of which take...

author Michael Ilseman <milseman@apple.com>

Wed, 12 Dec 2012 00:27:46 +0000 (00:27 +0000)

committer Michael Ilseman <milseman@apple.com>

Wed, 12 Dec 2012 00:27:46 +0000 (00:27 +0000)
author Michael Ilseman <milseman@apple.com>
Wed, 12 Dec 2012 00:27:46 +0000 (00:27 +0000)
committer Michael Ilseman <milseman@apple.com>
Wed, 12 Dec 2012 00:27:46 +0000 (00:27 +0000)
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h

index e9b72a2fa794ee3958aed9a99f0eb35d23eb4669..721f0ddd2eaf6bf4eea46458d1be5238a98a18e3 100644 (file)
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -44,6 +44,20 @@ namespace llvm {
                           const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
  
+  /// Given operands for an FAdd, see if we can fold the result.  If not, this
+  /// returns null.
+  Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                         const DataLayout *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
+
+  /// Given operands for an FSub, see if we can fold the result.  If not, this
+  /// returns null.
+  Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF,
+                         const DataLayout *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
+
    /// Given operands for an FMul, see if we can fold the result.  If not, this
    /// returns null.
    Value *SimplifyFMulInst(Value *LHS, Value *RHS,
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp

index dd109bde3acd750031b9195ee9e688532b7c8c15..42e506bbb0c9dd6e9c84100d43b93f553d42f8a6 100644 (file)
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -853,6 +853,85 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                             RecursionLimit);
  }
  
+/// Given operands for an FAdd, see if we can fold the result.  If not, this
+/// returns null.
+static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(),
+                                      Ops, Q.TD, Q.TLI);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // fadd X, -0 ==> X
+  if (match(Op1, m_NegZero()))
+    return Op0;
+
+  // fadd X, 0 ==> X, when we know X is not -0
+  if (match(Op1, m_Zero()) &&
+      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+    return Op0;
+
+  // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+  //   where nnan and ninf have to occur at least once somewhere in this
+  //   expression
+  Value *SubOp = 0;
+  if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
+    SubOp = Op1;
+  else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
+    SubOp = Op0;
+  if (SubOp) {
+    Instruction *FSub = cast<Instruction>(SubOp);
+    if ((FMF.noNaNs() || FSub->hasNoNaNs()) &&
+        (FMF.noInfs() || FSub->hasNoInfs()))
+      return Constant::getNullValue(Op0->getType());
+  }
+
+  return 0;
+}
+
+/// Given operands for an FSub, see if we can fold the result.  If not, this
+/// returns null.
+static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(),
+                                      Ops, Q.TD, Q.TLI);
+    }
+  }
+
+  // fsub X, 0 ==> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // fsub X, -0 ==> X, when we know X is not -0
+  if (match(Op1, m_NegZero()) &&
+      (FMF.noSignedZeros() || CannotBeNegativeZero(Op0)))
+    return Op0;
+
+  // fsub 0, (fsub -0.0, X) ==> X
+  Value *X;
+  if (match(Op0, m_AnyZero())) {
+    if (match(Op1, m_FSub(m_NegZero(), m_Value(X))))
+      return X;
+    if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X))))
+      return X;
+  }
+
+  // fsub nnan ninf x, x ==> 0.0
+  if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  return 0;
+}
+
  /// Given the operands for an FMul, see if we can fold the result
  static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
                                 FastMathFlags FMF,
@@ -864,19 +943,19 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
        return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(),
                                        Ops, Q.TD, Q.TLI);
      }
- }
  
- // Check for some fast-math optimizations
- if (FMF.noNaNs()) {
-   if (FMF.noSignedZeros()) {
-     // fmul N S 0, x ==> 0
-     if (match(Op0, m_Zero()))
-       return Op0;
-     if (match(Op1, m_Zero()))
-       return Op1;
-   }
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
   }
  
+ // fmul X, 1.0 ==> X
+ if (match(Op1, m_FPOne()))
+   return Op0;
+
+ // fmul nnan nsz X, 0 ==> 0
+ if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
+   return Op1;
+
   return 0;
  }
  
@@ -945,6 +1024,18 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
    return 0;
  }
  
+Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
+Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
+                             const DataLayout *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
  Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
                                FastMathFlags FMF,
                                const DataLayout *TD,
@@ -2789,12 +2880,20 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
    default:
      Result = ConstantFoldInstruction(I, TD, TLI);
      break;
+  case Instruction::FAdd:
+    Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), TD, TLI, DT);
+    break;
    case Instruction::Add:
      Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->hasNoSignedWrap(),
                               cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
                               TD, TLI, DT);
      break;
+  case Instruction::FSub:
+    Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), TD, TLI, DT);
+    break;
    case Instruction::Sub:
      Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->hasNoSignedWrap(),
diff --git a/llvm/test/Transforms/InstSimplify/fast-math.ll b/llvm/test/Transforms/InstSimplify/fast-math.ll

index e4b3ea306a1b05289c69891d2f5bbb4cca325b6e..154b96739791cbda30d4780f8e4e6997cb346aeb 100644 (file)
--- a/llvm/test/Transforms/InstSimplify/fast-math.ll
+++ b/llvm/test/Transforms/InstSimplify/fast-math.ll
@@ -33,3 +33,75 @@ define float @no_mul_zero_3(float %a) {
  ; CHECK: ret float %b
    ret float %b
  }
+
+; fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
+;   where nnan and ninf have to occur at least once somewhere in this
+;   expression
+; CHECK: fadd_fsub_0
+define float @fadd_fsub_0(float %a) {
+; X + -X ==> 0
+  %t1 = fsub nnan ninf float 0.0, %a
+  %zero1 = fadd nnan ninf float %t1, %a
+
+  %t2 = fsub nnan float 0.0, %a
+  %zero2 = fadd ninf float %t2, %a
+
+  %t3 = fsub nnan ninf float 0.0, %a
+  %zero3 = fadd float %t3, %a
+
+  %t4 = fsub float 0.0, %a
+  %zero4 = fadd nnan ninf float %t4, %a
+
+; Dont fold this
+; CHECK: %nofold = fsub float 0.0
+  %nofold = fsub float 0.0, %a
+; CHECK: %no_zero = fadd nnan float %nofold, %a
+  %no_zero = fadd nnan float %nofold, %a
+
+; Coalesce the folded zeros
+  %zero5 = fadd float %zero1, %zero2
+  %zero6 = fadd float %zero3, %zero4
+  %zero7 = fadd float %zero5, %zero6
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero7
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fsub nnan ninf x, x ==> 0.0
+; CHECK: @fsub_x_x
+define float @fsub_x_x(float %a) {
+; X - X ==> 0
+  %zero1 = fsub nnan ninf float %a, %a
+
+; Dont fold
+; CHECK: %no_zero1 = fsub
+  %no_zero1 = fsub ninf float %a, %a
+; CHECK: %no_zero2 = fsub
+  %no_zero2 = fsub nnan float %a, %a
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; Should get folded
+  %ret = fadd nsz float %no_zero, %zero1
+
+; CHECK: ret float %no_zero
+  ret float %ret
+}
+
+; fadd nsz X, 0 ==> X
+; CHECK: @nofold_fadd_x_0
+define float @nofold_fadd_x_0(float %a) {
+; Dont fold
+; CHECK: %no_zero1 = fadd
+  %no_zero1 = fadd ninf float %a, 0.0
+; CHECK: %no_zero2 = fadd
+  %no_zero2 = fadd nnan float %a, 0.0
+; CHECK: %no_zero = fadd
+  %no_zero = fadd float %no_zero1, %no_zero2
+
+; CHECK: ret float %no_zero
+  ret float %no_zero
+}
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll

new file mode 100644 (file)

index 0000000..f9c364c
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; fsub 0, (fsub 0, X) ==> X
+; CHECK: @fsub_0_0_x
+define float @fsub_0_0_x(float %a) {
+  %t1 = fsub float -0.0, %a
+  %ret = fsub float -0.0, %t1
+
+; CHECK: ret float %a
+  ret float %ret
+}
+
+; fsub X, 0 ==> X
+; CHECK: @fsub_x_0
+define float @fsub_x_0(float %a) {
+  %ret = fsub float %a, 0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fadd X, -0 ==> X
+; CHECK: @fadd_x_n0
+define float @fadd_x_n0(float %a) {
+  %ret = fadd float %a, -0.0
+; CHECK ret float %a
+  ret float %ret
+}
+
+; fmul X, 1.0 ==> X
+; CHECK: @fmul_X_1
+define double @fmul_X_1(double %a) {
+  %b = fmul double 1.000000e+00, %a                ; <double> [#uses=1]
+  ; CHECK: ret double %a
+  ret double %b
+}
author	Michael Ilseman <milseman@apple.com>
	Wed, 12 Dec 2012 00:27:46 +0000 (00:27 +0000)
committer	Michael Ilseman <milseman@apple.com>
	Wed, 12 Dec 2012 00:27:46 +0000 (00:27 +0000)
llvm/include/llvm/Analysis/InstructionSimplify.h		patch \| blob \| history
llvm/lib/Analysis/InstructionSimplify.cpp		patch \| blob \| history
llvm/test/Transforms/InstSimplify/fast-math.ll		patch \| blob \| history
llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll	[new file with mode: 0644]	patch \| blob