Use rsqrt (X86) to speed up reciprocal square root calcs

author Sanjay Patel <spatel@rotateright.com>

Fri, 24 Oct 2014 17:02:16 +0000 (17:02 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Fri, 24 Oct 2014 17:02:16 +0000 (17:02 +0000)
author Sanjay Patel <spatel@rotateright.com>
Fri, 24 Oct 2014 17:02:16 +0000 (17:02 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Fri, 24 Oct 2014 17:02:16 +0000 (17:02 +0000)
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h

index c6eac43..95b050a 100644 (file)
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -2652,13 +2652,16 @@ public:
    /// The RefinementSteps output is the number of Newton-Raphson refinement
    /// iterations required to generate a sufficient (though not necessarily
    /// IEEE-754 compliant) estimate for the value type.
+  /// The boolean UseOneConstNR output is used to select a Newton-Raphson
+  /// algorithm implementation that uses one constant or two constants.
    /// A target may choose to implement its own refinement within this function.
    /// If that's true, then return '0' as the number of RefinementSteps to avoid
    /// any further refinement of the estimate.
    /// An empty SDValue return means no estimate sequence can be created.
    virtual SDValue getRsqrtEstimate(SDValue Operand,
                                DAGCombinerInfo &DCI,
-                              unsigned &RefinementSteps) const {
+                              unsigned &RefinementSteps,
+                              bool &UseOneConstNR) const {
      return SDValue();
    }
  
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 6fe4ade..4ad2e5d 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -331,6 +331,8 @@ namespace {
      SDValue BuildUDIV(SDNode *N);
      SDValue BuildReciprocalEstimate(SDValue Op);
      SDValue BuildRsqrtEstimate(SDValue Op);
+    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
+    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
      SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                 bool DemandHighBits = true);
      SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -7033,13 +7035,11 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      // into a target-specific square root estimate instruction.
      if (N1.getOpcode() == ISD::FSQRT) {
        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
-        AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
        }
      } else if (N1.getOpcode() == ISD::FP_EXTEND &&
                 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
-        AddToWorklist(RV.getNode());
          RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
          AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -7047,7 +7047,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
      } else if (N1.getOpcode() == ISD::FP_ROUND &&
                 N1.getOperand(0).getOpcode() == ISD::FSQRT) {
        if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
-        AddToWorklist(RV.getNode());
          RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
          AddToWorklist(RV.getNode());
          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -7068,7 +7067,6 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
          // We found a FSQRT, so try to make this fold:
          // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
          if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
-          AddToWorklist(RV.getNode());
            RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
            AddToWorklist(RV.getNode());
            return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
@@ -7116,7 +7114,6 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
    if (DAG.getTarget().Options.UnsafeFPMath) {
      // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
      if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) {
-      AddToWorklist(RV.getNode());
        EVT VT = RV.getValueType();
        RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV);
        AddToWorklist(RV.getNode());
@@ -11985,49 +11982,89 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
    return SDValue();
  }
  
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
-  if (Level >= AfterLegalizeDAG)
-    return SDValue();
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+///     =>
+///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+/// As a result, we precompute A/2 prior to the iteration loop.
+SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
+                                          unsigned Iterations) {
+  EVT VT = Arg.getValueType();
+  SDLoc DL(Arg);
+  SDValue ThreeHalves = DAG.getConstantFP(1.5, VT);
+  
+  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
+  // this entire sequence requires only one FP constant.
+  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+  AddToWorklist(HalfArg.getNode());
+  
+  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+  AddToWorklist(HalfArg.getNode());
+  
+  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+  for (unsigned i = 0; i < Iterations; ++i) {
+    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    AddToWorklist(NewEst.getNode());
+    
+    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+    AddToWorklist(NewEst.getNode());
+    
+    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+    AddToWorklist(NewEst.getNode());
+    
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+    AddToWorklist(Est.getNode());
+  }
+  return Est;
+}
  
-  // Expose the DAG combiner to the target combiner implementations.
-  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
-  unsigned Iterations = 0;
-  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations)) {
-    if (Iterations) {
-      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-      // For the reciprocal sqrt, we need to find the zero of the function:
-      //   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
-      //     =>
-      //   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
-      // As a result, we precompute A/2 prior to the iteration loop.
-      EVT VT = Op.getValueType();
-      SDLoc DL(Op);
-      SDValue FPThreeHalves = DAG.getConstantFP(1.5, VT);
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal sqrt, we need to find the zero of the function:
+///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+///     =>
+///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
+SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
+                                          unsigned Iterations) {
+  EVT VT = Arg.getValueType();
+  SDLoc DL(Arg);
+  SDValue MinusThree = DAG.getConstantFP(-3.0, VT);
+  SDValue MinusHalf = DAG.getConstantFP(-0.5, VT);
  
-      AddToWorklist(Est.getNode());
+  // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
+  for (unsigned i = 0; i < Iterations; ++i) {
+    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+    AddToWorklist(HalfEst.getNode());
  
-      // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
-      // this entire sequence requires only one FP constant.
-      SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, FPThreeHalves, Op);
-      AddToWorklist(HalfArg.getNode());
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    AddToWorklist(Est.getNode());
  
-      HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Op);
-      AddToWorklist(HalfArg.getNode());
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+    AddToWorklist(Est.getNode());
  
-      // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
-      for (unsigned i = 0; i < Iterations; ++i) {
-        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
-        AddToWorklist(NewEst.getNode());
+    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+    AddToWorklist(Est.getNode());
  
-        NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
-        AddToWorklist(NewEst.getNode());
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+    AddToWorklist(Est.getNode());
+  }
+  return Est;
+}
  
-        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPThreeHalves, NewEst);
-        AddToWorklist(NewEst.getNode());
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+  if (Level >= AfterLegalizeDAG)
+    return SDValue();
  
-        Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
-        AddToWorklist(Est.getNode());
-      }
+  // Expose the DAG combiner to the target combiner implementations.
+  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+  unsigned Iterations = 0;
+  bool UseOneConstNR = false;
+  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+    AddToWorklist(Est.getNode());
+    if (Iterations) {
+      Est = UseOneConstNR ?
+        BuildRsqrtNROneConst(Op, Est, Iterations) :
+        BuildRsqrtNRTwoConst(Op, Est, Iterations);
      }
      return Est;
    }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp

index ef357f4..72d3a59 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7466,7 +7466,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  
  SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
                                              DAGCombinerInfo &DCI,
-                                            unsigned &RefinementSteps) const {
+                                            unsigned &RefinementSteps,
+                                            bool &UseOneConstNR) const {
    EVT VT = Operand.getValueType();
    if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
        (VT == MVT::f64 && Subtarget.hasFRSQRTE())  ||
@@ -7479,6 +7480,7 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
      RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
      if (VT.getScalarType() == MVT::f64)
        ++RefinementSteps;
+    UseOneConstNR = true;
      return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
    }
    return SDValue();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h

index 39f5987..7ae3673 100644 (file)
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -702,7 +702,8 @@ namespace llvm {
      SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
  
      SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps) const override;
+                             unsigned &RefinementSteps,
+                             bool &UseOneConstNR) const override;
      SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                               unsigned &RefinementSteps) const override;
  
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td

index b635f43..5c88b5d 100644 (file)
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -182,6 +182,8 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
                                     "LEA instruction with certain arguments is slow">;
  def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
                                     "INC and DEC instructions are slower than ADD and SUB">;
+def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
+                            "Use RSQRT* to optimize square root calculations">;
  
  //===----------------------------------------------------------------------===//
  // X86 processors supported.
@@ -347,7 +349,8 @@ def : ProcessorModel<"btver2", BtVer2Model,
                       [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
                        FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
                        FeatureBMI, FeatureF16C, FeatureMOVBE,
-                      FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD]>;
+                      FeatureLZCNT, FeaturePOPCNT, FeatureSlowSHLD,
+                      FeatureUseSqrtEst]>;
  
  // Bulldozer
  def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index dbe3c4a..b354154 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14367,6 +14367,36 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
    return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
  }
  
+/// The minimum architected relative accuracy is 2^-12. We need one
+/// Newton-Raphson step to have a good float result (24 bits of precision).
+SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
+                                            DAGCombinerInfo &DCI,
+                                            unsigned &RefinementSteps,
+                                            bool &UseOneConstNR) const {
+  // FIXME: We should use instruction latency models to calculate the cost of
+  // each potential sequence, but this is very hard to do reliably because
+  // at least Intel's Core* chips have variable timing based on the number of
+  // significant digits in the divisor and/or sqrt operand.
+  if (!Subtarget->useSqrtEst())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  
+  // SSE1 has rsqrtss and rsqrtps.
+  // TODO: Add support for AVX (v8f32) and AVX512 (v16f32).
+  // It is likely not profitable to do this for f64 because a double-precision
+  // rsqrt estimate with refinement on x86 prior to FMA requires at least 16
+  // instructions: convert to single, rsqrtss, convert back to double, refine
+  // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
+  // along with FMA, this could be a throughput win.
+  if (Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) {
+    RefinementSteps = 1;
+    UseOneConstNR = false;
+    return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
+}
+
  static bool isAllOnes(SDValue V) {
    ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
    return C && C->isAllOnesValue();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h

index e8e611d..e81a9d1 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1017,6 +1017,11 @@ namespace llvm {
  
      /// Convert a comparison if required by the subtarget.
      SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
+
+    /// Use rsqrt* to speed up sqrt calculations.
+    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
+                             unsigned &RefinementSteps,
+                             bool &UseOneConstNR) const override;
    };
  
    namespace X86 {
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp

index 413fc4b..82d62b4 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -278,6 +278,7 @@ void X86Subtarget::initializeEnvironment() {
    LEAUsesAG = false;
    SlowLEA = false;
    SlowIncDec = false;
+  UseSqrtEst = false;
    stackAlignment = 4;
    // FIXME: this is a known good value for Yonah. How about others?
    MaxInlineSizeThreshold = 128;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h

index 24ee553..cdecf2a 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -192,6 +192,11 @@ protected:
    /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags
    bool SlowIncDec;
  
+  /// Use the RSQRT* instructions to optimize square root calculations.
+  /// For this to be profitable, the cost of FSQRT and FDIV must be
+  /// substantially higher than normal FP ops like FADD and FMUL.
+  bool UseSqrtEst;
+
    /// Processor has AVX-512 PreFetch Instructions
    bool HasPFI;
  
@@ -369,6 +374,7 @@ public:
    bool LEAusesAG() const { return LEAUsesAG; }
    bool slowLEA() const { return SlowLEA; }
    bool slowIncDec() const { return SlowIncDec; }
+  bool useSqrtEst() const { return UseSqrtEst; }
    bool hasCDI() const { return HasCDI; }
    bool hasPFI() const { return HasPFI; }
    bool hasERI() const { return HasERI; }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll

index fc79e31..65f719e 100644 (file)
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
  
  ; generated using "clang -S -O2 -ffast-math -emit-llvm sqrt.c" from
  ; #include <math.h>
@@ -52,9 +53,59 @@ entry:
    ret x86_fp80 %call
  }
  
-; Function Attrs: nounwind readnone
  declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
  
+; If the target's sqrtss and divss instructions are substantially
+; slower than rsqrtss with a Newton-Raphson refinement, we should
+; generate the estimate sequence.
+define float @reciprocal_square_root(float %x) #0 {
+  %sqrt = tail call float @llvm.sqrt.f32(float %x)
+  %div = fdiv fast float 1.0, %sqrt
+  ret float %div
+
+; CHECK-LABEL: reciprocal_square_root:
+; CHECK: sqrtss
+; CHECK-NEXT: movss
+; CHECK-NEXT: divss
+; CHECK-NEXT: retq
+; BTVER2-LABEL: reciprocal_square_root:
+; BTVER2: vrsqrtss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: vaddss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: retq
+}
+
+declare float @llvm.sqrt.f32(float) #1
+
+; If the target's sqrtps and divps instructions are substantially
+; slower than rsqrtps with a Newton-Raphson refinement, we should
+; generate the estimate sequence.
+define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
+  %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <4 x float> %div
+
+; CHECK-LABEL: reciprocal_square_root_v4f32:
+; CHECK: sqrtps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: divps
+; CHECK-NEXT: retq
+; BTVER2-LABEL: reciprocal_square_root_v4f32:
+; BTVER2: vrsqrtps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vaddps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: retq
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
+
+
  attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
  attributes #2 = { nounwind readnone }
author	Sanjay Patel <spatel@rotateright.com>
	Fri, 24 Oct 2014 17:02:16 +0000 (17:02 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Fri, 24 Oct 2014 17:02:16 +0000 (17:02 +0000)
llvm/include/llvm/Target/TargetLowering.h		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
llvm/lib/Target/X86/X86.td		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.h		patch \| blob \| history
llvm/test/CodeGen/X86/sqrt-fastmath.ll		patch \| blob \| history