Revert "make reciprocal estimate code generation more flexible by adding command...

author Rafael Espindola <rafael.espindola@gmail.com>

Wed, 3 Jun 2015 05:32:44 +0000 (05:32 +0000)

committer Rafael Espindola <rafael.espindola@gmail.com>

Wed, 3 Jun 2015 05:32:44 +0000 (05:32 +0000)
author Rafael Espindola <rafael.espindola@gmail.com>
Wed, 3 Jun 2015 05:32:44 +0000 (05:32 +0000)
committer Rafael Espindola <rafael.espindola@gmail.com>
Wed, 3 Jun 2015 05:32:44 +0000 (05:32 +0000)
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h

index b824df3..a1b9b4e 100644 (file)
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -24,7 +24,6 @@
  #include "llvm/Support/Host.h"
  #include "llvm/Target/TargetMachine.h"
  #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRecip.h"
  #include <string>
  using namespace llvm;
  
@@ -153,12 +152,6 @@ FuseFPOps("fp-contract",
                           "Only fuse FP ops when the result won't be effected."),
                clEnumValEnd));
  
-cl::list<std::string>
-ReciprocalOps("recip",
-  cl::CommaSeparated,
-  cl::desc("Choose reciprocal operation types and parameters."),
-  cl::value_desc("all,none,default,divf,!vec-sqrtd,vec-divd:0,sqrt:9..."));
-
  cl::opt<bool>
  DontPlaceZerosInBSS("nozero-initialized-in-bss",
                cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -237,7 +230,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
    TargetOptions Options;
    Options.LessPreciseFPMADOption = EnableFPMAD;
    Options.AllowFPOpFusion = FuseFPOps;
-  Options.Reciprocals = TargetRecip(ReciprocalOps);
    Options.UnsafeFPMath = EnableUnsafeFPMath;
    Options.NoInfsFPMath = EnableNoInfsFPMath;
    Options.NoNaNsFPMath = EnableNoNaNsFPMath;
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h

index 8f8b78d..07a8f6d 100644 (file)
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -15,7 +15,6 @@
  #ifndef LLVM_TARGET_TARGETOPTIONS_H
  #define LLVM_TARGET_TARGETOPTIONS_H
  
-#include "llvm/Target/TargetRecip.h"
  #include "llvm/MC/MCTargetOptions.h"
  #include <string>
  
@@ -73,8 +72,7 @@ namespace llvm {
            CompressDebugSections(false), FunctionSections(false),
            DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
            TrapFuncName(), FloatABIType(FloatABI::Default),
-          AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
-          JTType(JumpTable::Single),
+          AllowFPOpFusion(FPOpFusion::Standard), JTType(JumpTable::Single),
            ThreadModel(ThreadModel::POSIX) {}
  
      /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
@@ -208,9 +206,6 @@ namespace llvm {
      /// the value of this option.
      FPOpFusion::FPOpFusionMode AllowFPOpFusion;
  
-    /// This class encapsulates options for reciprocal-estimate code generation.
-    TargetRecip Reciprocals;
-    
      /// JTType - This flag specifies the type of jump-instruction table to
      /// create for functions that have the jumptable attribute.
      JumpTable::JumpTableType JTType;
@@ -245,7 +240,6 @@ inline bool operator==(const TargetOptions &LHS,
      ARE_EQUAL(TrapFuncName) &&
      ARE_EQUAL(FloatABIType) &&
      ARE_EQUAL(AllowFPOpFusion) &&
-    ARE_EQUAL(Reciprocals) &&
      ARE_EQUAL(JTType) &&
      ARE_EQUAL(ThreadModel) &&
      ARE_EQUAL(MCOptions);
diff --git a/llvm/include/llvm/Target/TargetRecip.h b/llvm/include/llvm/Target/TargetRecip.h

deleted file mode 100644 (file)

index 4cc3672..0000000
--- a/llvm/include/llvm/Target/TargetRecip.h
+++ /dev/null
@@ -1,73 +0,0 @@
-//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGETRECIP_H
-#define LLVM_TARGET_TARGETRECIP_H
-
-#include "llvm/ADT/StringRef.h"
-#include <vector>
-#include <string>
-#include <map>
-
-namespace llvm {
-
-struct TargetRecip {
-public:
-  TargetRecip();
-
-  /// Initialize all or part of the operations from command-line options or
-  /// a front end.
-  TargetRecip(const std::vector<std::string> &Args);
-  
-  /// Set whether a particular reciprocal operation is enabled and how many
-  /// refinement steps are needed when using it. Use "all" to set enablement
-  /// and refinement steps for all operations.
-  void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps);
-
-  /// Return true if the reciprocal operation has been enabled by default or
-  /// from the command-line. Return false if the operation has been disabled
-  /// by default or from the command-line.
-  bool isEnabled(const StringRef &Key) const;
-
-  /// Return the number of iterations necessary to refine the
-  /// the result of a machine instruction for the given reciprocal operation.
-  unsigned getRefinementSteps(const StringRef &Key) const;
-
-  bool operator==(const TargetRecip &Other) const;
-
-private:
-  enum {
-    Uninitialized = -1
-  };
-  
-  struct RecipParams {
-    int8_t Enabled;
-    int8_t RefinementSteps;
-    
-    RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
-  };
-  
-  std::map<StringRef, RecipParams> RecipMap;
-  typedef std::map<StringRef, RecipParams>::iterator RecipIter;
-  typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
-
-  bool parseGlobalParams(const std::string &Arg);
-  void parseIndividualParams(const std::vector<std::string> &Args);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/llvm/lib/Target/CMakeLists.txt b/llvm/lib/Target/CMakeLists.txt

index e6d0199..1805437 100644 (file)
--- a/llvm/lib/Target/CMakeLists.txt
+++ b/llvm/lib/Target/CMakeLists.txt
@@ -6,7 +6,6 @@ add_llvm_library(LLVMTarget
    TargetLoweringObjectFile.cpp
    TargetMachine.cpp
    TargetMachineC.cpp
-  TargetRecip.cpp
    TargetSubtargetInfo.cpp
  
    ADDITIONAL_HEADER_DIRS
diff --git a/llvm/lib/Target/TargetRecip.cpp b/llvm/lib/Target/TargetRecip.cpp

deleted file mode 100644 (file)

index 42bc487..0000000
--- a/llvm/lib/Target/TargetRecip.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===-------------------------- TargetRecip.cpp ---------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRecip.h"
-#include <map>
-
-using namespace llvm;
-
-// These are the names of the individual reciprocal operations. These are
-// the key strings for queries and command-line inputs.
-// In addition, the command-line interface recognizes the global parameters
-// "all", "none", and "default".
-static const char *RecipOps[] = {
-  "divd",
-  "divf",
-  "vec-divd",
-  "vec-divf",
-  "sqrtd",
-  "sqrtf",
-  "vec-sqrtd",
-  "vec-sqrtf",
-};
-
-// The uninitialized state is needed for the enabled settings and refinement
-// steps because custom settings may arrive via the command-line before target
-// defaults are set.
-TargetRecip::TargetRecip() {
-  unsigned NumStrings = llvm::array_lengthof(RecipOps);
-  for (unsigned i = 0; i < NumStrings; ++i)
-    RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
-}
-
-static bool parseRefinementStep(const StringRef &In, size_t &Position,
-                                uint8_t &Value) {
-  const char RefStepToken = ':';
-  Position = In.find(RefStepToken);
-  if (Position == StringRef::npos)
-    return false;
-
-  StringRef RefStepString = In.substr(Position + 1);
-  // Allow exactly one numeric character for the additional refinement
-  // step parameter.
-  if (RefStepString.size() == 1) {
-    char RefStepChar = RefStepString[0];
-    if (RefStepChar >= '0' && RefStepChar <= '9') {
-      Value = RefStepChar - '0';
-      return true;
-    }
-  }
-  report_fatal_error("Invalid refinement step for -recip.");
-}
-
-bool TargetRecip::parseGlobalParams(const std::string &Arg) {
-  StringRef ArgSub = Arg;
-
-  // Look for an optional setting of the number of refinement steps needed
-  // for this type of reciprocal operation.
-  size_t RefPos;
-  uint8_t RefSteps;
-  StringRef RefStepString;
-  if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
-    // Split the string for further processing.
-    RefStepString = ArgSub.substr(RefPos + 1);
-    ArgSub = ArgSub.substr(0, RefPos);
-  }
-  bool Enable;
-  bool UseDefaults;
-  if (ArgSub == "all") {
-    UseDefaults = false;
-    Enable = true;
-  } else if (ArgSub == "none") {
-    UseDefaults = false;
-    Enable = false;
-  } else if (ArgSub == "default") {
-    UseDefaults = true;
-  } else {
-    // Any other string is invalid or an individual setting.
-    return false;
-  }
-
-  // All enable values will be initialized to target defaults if 'default' was
-  // specified.
-  if (!UseDefaults)
-    for (auto &KV : RecipMap)
-      KV.second.Enabled = Enable;
-
-  // Custom refinement count was specified with all, none, or default.
-  if (!RefStepString.empty())
-    for (auto &KV : RecipMap)
-      KV.second.RefinementSteps = RefSteps;
-  
-  return true;
-}
-
-void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
-  static const char DisabledPrefix = '!';
-  unsigned NumArgs = Args.size();
-
-  for (unsigned i = 0; i != NumArgs; ++i) {
-    StringRef Val = Args[i];
-    
-    bool IsDisabled = Val[0] == DisabledPrefix;
-    // Ignore the disablement token for string matching.
-    if (IsDisabled)
-      Val = Val.substr(1);
-    
-    size_t RefPos;
-    uint8_t RefSteps;
-    StringRef RefStepString;
-    if (parseRefinementStep(Val, RefPos, RefSteps)) {
-      // Split the string for further processing.
-      RefStepString = Val.substr(RefPos + 1);
-      Val = Val.substr(0, RefPos);
-    }
-
-    RecipIter Iter = RecipMap.find(Val);
-    if (Iter == RecipMap.end()) {
-      // Try again specifying float suffix.
-      Iter = RecipMap.find(Val.str() + 'f');
-      if (Iter == RecipMap.end()) {
-        Iter = RecipMap.find(Val.str() + 'd');
-        assert(Iter == RecipMap.end() && "Float entry missing from map");
-        report_fatal_error("Invalid option for -recip.");
-      }
-      
-      // The option was specified without a float or double suffix.
-      if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
-        // Make sure that the double entry was not already specified.
-        // The float entry will be checked below.
-        report_fatal_error("Duplicate option for -recip.");
-      }
-    }
-    
-    if (Iter->second.Enabled != Uninitialized)
-      report_fatal_error("Duplicate option for -recip.");
-    
-    // Mark the matched option as found. Do not allow duplicate specifiers.
-    Iter->second.Enabled = !IsDisabled;
-    if (!RefStepString.empty())
-      Iter->second.RefinementSteps = RefSteps;
-    
-    // If the precision was not specified, the double entry is also initialized.
-    if (Val.back() != 'f' && Val.back() != 'd') {
-      RecipMap[Val.str() + 'd'].Enabled = !IsDisabled;
-      if (!RefStepString.empty())
-        RecipMap[Val.str() + 'd'].RefinementSteps = RefSteps;
-    }
-  }
-}
-
-TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
-  TargetRecip() {
-  unsigned NumArgs = Args.size();
-
-  // Check if "all", "default", or "none" was specified.
-  if (NumArgs == 1 && parseGlobalParams(Args[0]))
-    return;
- 
-  parseIndividualParams(Args);
-}
-
-bool TargetRecip::isEnabled(const StringRef &Key) const {
-  ConstRecipIter Iter = RecipMap.find(Key);
-  assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.Enabled != Uninitialized &&
-         "Enablement setting was not initialized");
-  return Iter->second.Enabled;
-}
-
-unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
-  ConstRecipIter Iter = RecipMap.find(Key);
-  assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.RefinementSteps != Uninitialized &&
-         "Refinement step setting was not initialized");
-  return Iter->second.RefinementSteps;
-}
-
-/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
-                              unsigned RefSteps) {
-  if (Key == "all") {
-    for (auto &KV : RecipMap) {
-      RecipParams &RP = KV.second;
-      if (RP.Enabled == Uninitialized)
-        RP.Enabled = Enable;
-      if (RP.RefinementSteps == Uninitialized)
-        RP.RefinementSteps = RefSteps;
-    }
-  } else {
-    RecipParams &RP = RecipMap[Key];
-    if (RP.Enabled == Uninitialized)
-      RP.Enabled = Enable;
-    if (RP.RefinementSteps == Uninitialized)
-      RP.RefinementSteps = RefSteps;
-  }
-}
-
-bool TargetRecip::operator==(const TargetRecip &Other) const {
-  for (const auto &KV : RecipMap) {
-    const StringRef &Op = KV.first;
-    const RecipParams &RP = KV.second;
-    const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
-    if (RP.RefinementSteps != OtherRP.RefinementSteps)
-      return false;
-    if (RP.Enabled != OtherRP.Enabled)
-      return false;
-  }
-  return true;
-}
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td

index 7001910..c70e2e9 100644 (file)
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -188,6 +188,10 @@ def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
                                     "LEA instruction with certain arguments is slow">;
  def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
                                     "INC and DEC instructions are slower than ADD and SUB">;
+def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
+                            "Use RSQRT* to optimize square root calculations">;
+def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
+                          "true", "Use RCP* to optimize division calculations">;
  def FeatureSoftFloat
      : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
                         "Use software floating point features.">;
@@ -440,7 +444,7 @@ def : ProcessorModel<"btver2", BtVer2Model,
                        FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
                        FeatureBMI, FeatureF16C, FeatureMOVBE,
                        FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
-                      FeatureSlowSHLD]>;
+                      FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>;
  
  // TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
  
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 248c452..a4787c8 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -67,6 +67,12 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
               "rather than promotion."),
      cl::Hidden);
  
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+    "x86-recip-refinement-steps", cl::init(1),
+    cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+             "result of the hardware reciprocal estimate instruction."),
+    cl::NotHidden);
+
  // Forward declarations.
  static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
                         SDValue V2);
@@ -13000,31 +13006,29 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
                                              DAGCombinerInfo &DCI,
                                              unsigned &RefinementSteps,
                                              bool &UseOneConstNR) const {
+  // FIXME: We should use instruction latency models to calculate the cost of
+  // each potential sequence, but this is very hard to do reliably because
+  // at least Intel's Core* chips have variable timing based on the number of
+  // significant digits in the divisor and/or sqrt operand.
+  if (!Subtarget->useSqrtEst())
+    return SDValue();
+
    EVT VT = Op.getValueType();
-  const char *RecipOp;
  
-  // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
+  // SSE1 has rsqrtss and rsqrtps.
    // TODO: Add support for AVX512 (v16f32).
    // It is likely not profitable to do this for f64 because a double-precision
    // rsqrt estimate with refinement on x86 prior to FMA requires at least 16
    // instructions: convert to single, rsqrtss, convert back to double, refine
    // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
    // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget->hasSSE1())
-    RecipOp = "sqrtf";
-  else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget->hasAVX()))
-    RecipOp = "vec-sqrtf";
-  else
-    return SDValue();
-  
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
-  
-  RefinementSteps = Recips.getRefinementSteps(RecipOp);
-  UseOneConstNR = false;
-  return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+  if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+      (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+    RefinementSteps = 1;
+    UseOneConstNR = false;
+    return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
  }
  
  /// The minimum architected relative accuracy is 2^-12. We need one
@@ -13032,9 +13036,15 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
  SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
                                              DAGCombinerInfo &DCI,
                                              unsigned &RefinementSteps) const {
+  // FIXME: We should use instruction latency models to calculate the cost of
+  // each potential sequence, but this is very hard to do reliably because
+  // at least Intel's Core* chips have variable timing based on the number of
+  // significant digits in the divisor.
+  if (!Subtarget->useReciprocalEst())
+    return SDValue();
+
    EVT VT = Op.getValueType();
-  const char *RecipOp;
-  
+
    // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
    // TODO: Add support for AVX512 (v16f32).
    // It is likely not profitable to do this for f64 because a double-precision
@@ -13042,20 +13052,12 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
    // 15 instructions: convert to single, rcpss, convert back to double, refine
    // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
    // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget->hasSSE1())
-    RecipOp = "divf";
-  else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget->hasAVX()))
-    RecipOp = "vec-divf";
-  else
-    return SDValue();
-  
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
-
-  RefinementSteps = Recips.getRefinementSteps(RecipOp);
-  return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+  if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+      (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+    RefinementSteps = ReciprocalEstimateRefinementSteps;
+    return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
  }
  
  /// If we have at least two divisions that use the same divisor, convert to
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp

index 65e702e..1cdab14 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -273,6 +273,8 @@ void X86Subtarget::initializeEnvironment() {
    LEAUsesAG = false;
    SlowLEA = false;
    SlowIncDec = false;
+  UseSqrtEst = false;
+  UseReciprocalEst = false;
    stackAlignment = 4;
    // FIXME: this is a known good value for Yonah. How about others?
    MaxInlineSizeThreshold = 128;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h

index 27429d0..455dd77 100644 (file)
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -190,6 +190,16 @@ protected:
    /// True if INC and DEC instructions are slow when writing to flags
    bool SlowIncDec;
  
+  /// Use the RSQRT* instructions to optimize square root calculations.
+  /// For this to be profitable, the cost of FSQRT and FDIV must be
+  /// substantially higher than normal FP ops like FADD and FMUL.
+  bool UseSqrtEst;
+
+  /// Use the RCP* instructions to optimize FP division calculations.
+  /// For this to be profitable, the cost of FDIV must be
+  /// substantially higher than normal FP ops like FADD and FMUL.
+  bool UseReciprocalEst;
+
    /// Processor has AVX-512 PreFetch Instructions
    bool HasPFI;
  
@@ -367,6 +377,8 @@ public:
    bool LEAusesAG() const { return LEAUsesAG; }
    bool slowLEA() const { return SlowLEA; }
    bool slowIncDec() const { return SlowIncDec; }
+  bool useSqrtEst() const { return UseSqrtEst; }
+  bool useReciprocalEst() const { return UseReciprocalEst; }
    bool hasCDI() const { return HasCDI; }
    bool hasPFI() const { return HasPFI; }
    bool hasERI() const { return HasERI; }
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp

index 646cff7..4116cef 100644 (file)
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -105,13 +105,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU,
    if (Subtarget.isTargetWin64())
      this->Options.TrapUnreachable = true;
  
-  // TODO: By default, all reciprocal estimate operations are off because
-  // that matches the behavior before TargetRecip was added (except for btver2
-  // which used subtarget features to enable this type of codegen).
-  // We should change this to match GCC behavior where everything but
-  // scalar division estimates are turned on by default with -ffast-math.
-  this->Options.Reciprocals.setDefaults("all", false, 1);
-
    initAsmInfo();
  }
  
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll

index 7f1521a..fcd0770 100644 (file)
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -1,6 +1,6 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
  
  ; If the target's divss/divps instructions are substantially
  ; slower than rcpss/rcpps with a Newton-Raphson refinement,
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll

index 373fa53..4c6b521 100644 (file)
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE
  
  declare double @__sqrt_finite(double) #0
  declare float @__sqrtf_finite(float) #0
author	Rafael Espindola <rafael.espindola@gmail.com>
	Wed, 3 Jun 2015 05:32:44 +0000 (05:32 +0000)
committer	Rafael Espindola <rafael.espindola@gmail.com>
	Wed, 3 Jun 2015 05:32:44 +0000 (05:32 +0000)
llvm/include/llvm/CodeGen/CommandFlags.h		patch \| blob \| history
llvm/include/llvm/Target/TargetOptions.h		patch \| blob \| history
llvm/include/llvm/Target/TargetRecip.h	[deleted file]	patch \| blob \| history
llvm/lib/Target/CMakeLists.txt		patch \| blob \| history
llvm/lib/Target/TargetRecip.cpp	[deleted file]	patch \| blob \| history
llvm/lib/Target/X86/X86.td		patch \| blob \| history
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86Subtarget.h		patch \| blob \| history
llvm/lib/Target/X86/X86TargetMachine.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/recip-fastmath.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sqrt-fastmath.ll		patch \| blob \| history