This is a retry of r284495 which was reverted at r284513 due to use-after-scope bugs
caused by faulty usage of StringRef.
This version also renames a pair of functions:
getRecipEstimateDivEnabled()
getRecipEstimateSqrtEnabled()
as suggested by Eric Christopher.
original commit msg:
[Target] remove TargetRecip class; move reciprocal estimate isel functionality to TargetLowering
This is a follow-up to https://reviews.llvm.org/D24816 - where we changed reciprocal estimates to be function attributes
rather than TargetOptions.
This patch is intended to be a structural, but not functional change. By moving all of the
TargetRecip functionality into TargetLowering, we can remove all of the reciprocal estimate
state, shield the callers from the string format implementation, and simplify/localize the
logic needed for a target to enable this.
If a function has a "reciprocal-estimates" attribute, those settings may override the target's
default reciprocal preferences for whatever operation and data type we're trying to optimize.
If there's no attribute string or specific setting for the op/type pair, just use the target
default settings.
As noted earlier, a better solution would be to move the reciprocal estimate settings to IR
instructions and SDNodes rather than function attributes, but that's a multi-step job that
requires infrastructure improvements. I intend to work on that, but it's not clear how long
it will take to get all the pieces in place.
Differential Revision: https://reviews.llvm.org/D25440
llvm-svn: 284746
class MCSymbol;
template<typename T> class SmallVectorImpl;
class DataLayout;
- struct TargetRecip;
class TargetRegisterClass;
class TargetLibraryInfo;
class TargetLoweringObjectFile;
return false;
}
+ /// Reciprocal estimate status values used by the functions below.
+ enum ReciprocalEstimate : int {
+ Unspecified = -1,
+ Disabled = 0,
+ Enabled = 1
+ };
+
+ /// Return a ReciprocalEstimate enum value for a square root of the given type
+ /// based on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
+
+ /// Return a ReciprocalEstimate enum value for a division of the given type
+ /// based on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
+
+ /// Return the refinement step count for a square root of the given type based
+ /// on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
+
+ /// Return the refinement step count for a division of the given type based
+ /// on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
+
/// Returns true if target has indicated at least one type should be bypassed.
bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
}
}
- /// Return the reciprocal estimate code generation preferences for this target
- /// after potentially overriding settings using the function's attributes.
- /// FIXME: Like all unsafe-math target settings, this should really be an
- /// instruction-level attribute/metadata/FMF.
- TargetRecip getTargetRecipForFunc(MachineFunction &MF) const;
-
/// Vector types are broken down into some number of legal first class types.
/// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
/// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- TargetRecip ReciprocalEstimates;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG
/// roots.
/// Return a reciprocal square root estimate value for the input operand.
- /// The RefinementSteps output is the number of Newton-Raphson refinement
- /// iterations required to generate a sufficient (though not necessarily
- /// IEEE-754 compliant) estimate for the value type.
+ /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+ /// 'Enabled' as set by a potential default override attribute.
+ /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+ /// refinement iterations required to generate a sufficient (though not
+ /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
/// The boolean UseOneConstNR output is used to select a Newton-Raphson
/// algorithm implementation that uses one constant or two constants.
/// A target may choose to implement its own refinement within this function.
/// If that's true, then return '0' as the number of RefinementSteps to avoid
/// any further refinement of the estimate.
/// An empty SDValue return means no estimate sequence can be created.
- virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ virtual SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
bool &UseOneConstNR) const {
return SDValue();
}
/// Return a reciprocal estimate value for the input operand.
- /// The RefinementSteps output is the number of Newton-Raphson refinement
- /// iterations required to generate a sufficient (though not necessarily
- /// IEEE-754 compliant) estimate for the value type.
+ /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+ /// 'Enabled' as set by a potential default override attribute.
+ /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+ /// refinement iterations required to generate a sufficient (though not
+ /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
/// A target may choose to implement its own refinement within this function.
/// If that's true, then return '0' as the number of RefinementSteps to avoid
/// any further refinement of the estimate.
/// An empty SDValue return means no estimate sequence can be created.
- virtual SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+ virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps) const {
return SDValue();
}
#ifndef LLVM_TARGET_TARGETOPTIONS_H
#define LLVM_TARGET_TARGETOPTIONS_H
-#include "llvm/Target/TargetRecip.h"
#include "llvm/MC/MCTargetOptions.h"
namespace llvm {
+++ /dev/null
-//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGETRECIP_H
-#define LLVM_TARGET_TARGETRECIP_H
-
-#include <cstdint>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-class StringRef;
-
-struct TargetRecip {
-public:
- TargetRecip();
-
- /// Parse a comma-separated string of reciprocal settings to set values in
- /// this struct.
- void set(StringRef &Args);
-
- /// Set enablement and refinement steps for a particular reciprocal operation.
- /// Use "all" to give all operations the same values.
- void set(StringRef Key, bool Enable, unsigned RefSteps);
-
- /// Return true if the reciprocal operation has been enabled.
- bool isEnabled(StringRef Key) const;
-
- /// Return the number of iterations necessary to refine the
- /// the result of a machine instruction for the given reciprocal operation.
- unsigned getRefinementSteps(StringRef Key) const;
-
- bool operator==(const TargetRecip &Other) const;
-
-private:
- // TODO: We should be able to use special values (enums) to simplify this into
- // just an int, but we have to be careful because the user is allowed to
- // specify "default" as a setting and just change the refinement step count.
- struct RecipParams {
- bool Enabled;
- int8_t RefinementSteps;
-
- RecipParams() : Enabled(false), RefinementSteps(0) {}
- };
-
- std::map<StringRef, RecipParams> RecipMap;
- typedef std::map<StringRef, RecipParams>::iterator RecipIter;
- typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
-
- bool parseGlobalParams(const std::string &Arg);
- void parseIndividualParams(const std::vector<std::string> &Args);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_TARGETRECIP_H
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
- unsigned Iterations = 0;
- if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getDivRefinementSteps(VT, MF);
+ if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
if (Iterations) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
AddToWorklist(Est.getNode());
// Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (unsigned i = 0; i < Iterations; ++i) {
+ for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
- unsigned Iterations = 0;
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
bool UseOneConstNR = false;
- if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ if (SDValue Est =
+ TLI.getRsqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR)) {
AddToWorklist(Est.getNode());
if (Iterations) {
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
}
return Est;
}
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
InitLibcallCallingConvs(LibcallCallingConvs);
- ReciprocalEstimates.set("all", false, 0);
}
void TargetLoweringBase::initActions() {
return MVT::i32; // return the default value
}
-TargetRecip
-TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const {
- const Function *F = MF.getFunction();
- StringRef RecipAttrName = "reciprocal-estimates";
- if (!F->hasFnAttribute(RecipAttrName))
- return ReciprocalEstimates;
-
- // Make a copy of the target's default reciprocal codegen settings.
- TargetRecip Recips = ReciprocalEstimates;
-
- // Override any settings that are customized for this function.
- StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString();
- Recips.set(RecipString);
- return Recips;
-}
-
/// getVectorTypeBreakdown - Vector types are broken down into some number of
/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
MaximumJumpTableSize = Val;
}
+
+//===----------------------------------------------------------------------===//
+// Reciprocal Estimates
+//===----------------------------------------------------------------------===//
+
+/// Get the reciprocal estimate attribute string for a function that will
+/// override the target defaults.
+static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ StringRef RecipAttrName = "reciprocal-estimates";
+ if (!F->hasFnAttribute(RecipAttrName))
+ return StringRef();
+
+ return F->getFnAttribute(RecipAttrName).getValueAsString();
+}
+
+/// Construct a string for the given reciprocal operation of the given type.
+/// This string should match the corresponding option to the front-end's
+/// "-mrecip" flag assuming those strings have been passed through in an
+/// attribute string. For example, "vec-divf" for a division of a vXf32.
+static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
+ std::string Name = VT.isVector() ? "vec-" : "";
+
+ Name += IsSqrt ? "sqrt" : "div";
+
+ // TODO: Handle "half" or other float types?
+ if (VT.getScalarType() == MVT::f64) {
+ Name += "d";
+ } else {
+ assert(VT.getScalarType() == MVT::f32 &&
+ "Unexpected FP type for reciprocal estimate");
+ Name += "f";
+ }
+
+ return Name;
+}
+
+/// Return the character position and value (a single numeric character) of a
+/// customized refinement operation in the input string if it exists. Return
+/// false if there is no customized refinement step count.
+static bool parseRefinementStep(StringRef In, size_t &Position,
+ uint8_t &Value) {
+ const char RefStepToken = ':';
+ Position = In.find(RefStepToken);
+ if (Position == StringRef::npos)
+ return false;
+
+ StringRef RefStepString = In.substr(Position + 1);
+ // Allow exactly one numeric character for the additional refinement
+ // step parameter.
+ if (RefStepString.size() == 1) {
+ char RefStepChar = RefStepString[0];
+ if (RefStepChar >= '0' && RefStepChar <= '9') {
+ Value = RefStepChar - '0';
+ return true;
+ }
+ }
+ report_fatal_error("Invalid refinement step for -recip.");
+}
+
+/// For the input attribute string, return one of the ReciprocalEstimate enum
+/// status values (enabled, disabled, or not specified) for this operation on
+/// the specified data type.
+static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ SplitString(Override, OverrideVector, ",");
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "none", or "default" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(Override, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ }
+
+ // All reciprocal types are enabled.
+ if (Override == "all")
+ return TargetLoweringBase::ReciprocalEstimate::Enabled;
+
+ // All reciprocal types are disabled.
+ if (Override == "none")
+ return TargetLoweringBase::ReciprocalEstimate::Disabled;
+
+ // Target defaults for enablement are used.
+ if (Override == "default")
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTName.pop_back();
+ static const char DisabledPrefix = '!';
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(RecipType, RefPos, RefSteps))
+ RecipType = RecipType.substr(0, RefPos);
+
+ // Ignore the disablement token for string matching.
+ bool IsDisabled = RecipType[0] == DisabledPrefix;
+ if (IsDisabled)
+ RecipType = RecipType.substr(1);
+
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
+ : TargetLoweringBase::ReciprocalEstimate::Enabled;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+/// For the input attribute string, return the customized refinement step count
+/// for this operation on the specified data type. If the step count does not
+/// exist, return the ReciprocalEstimate enum value for unspecified.
+static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ SplitString(Override, OverrideVector, ",");
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "default", or "none" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(Override, RefPos, RefSteps))
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ assert(Override != "none" &&
+ "Disabled reciprocals, but specifed refinement steps?");
+
+ // If this is a general override, return the specified number of steps.
+ if (Override == "all" || Override == "default")
+ return RefSteps;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTName.pop_back();
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(RecipType, RefPos, RefSteps))
+ continue;
+
+ RecipType = RecipType.substr(0, RefPos);
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return RefSteps;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getDivRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
+}
}
SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const {
- SelectionDAG &DAG = DCI.DAG;
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
}
SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
- SelectionDAG &DAG = DCI.DAG;
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const {
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}
- SDValue getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,
SelectionDAG &DAG) const = 0;
TargetLoweringObjectFile.cpp
TargetMachine.cpp
TargetMachineC.cpp
- TargetRecip.cpp
TargetSubtargetInfo.cpp
ADDITIONAL_HEADER_DIRS
setTargetDAGCombine(ISD::FSQRT);
}
- // For the estimates, convergence is quadratic, so we essentially double the
- // number of digits correct after every iteration. For both FRE and FRSQRTE,
- // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
- // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
- unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
- RefinementSteps64 = RefinementSteps + 1;
-
- ReciprocalEstimates.set("sqrtf", true, RefinementSteps);
- ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps);
- ReciprocalEstimates.set("divf", true, RefinementSteps);
- ReciprocalEstimates.set("vec-divf", true, RefinementSteps);
-
- ReciprocalEstimates.set("sqrtd", true, RefinementSteps64);
- ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64);
- ReciprocalEstimates.set("divd", true, RefinementSteps64);
- ReciprocalEstimates.set("vec-divd", true, RefinementSteps64);
-
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-static std::string getRecipOp(const char *Base, EVT VT) {
- std::string RecipOp(Base);
+static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- RecipOp += "d";
- else
- RecipOp += "f";
-
- if (VT.isVector())
- RecipOp = "vec-" + RecipOp;
-
- return RecipOp;
+ RefinementSteps++;
+ return RefinementSteps;
}
-SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
bool &UseOneConstNR) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- std::string RecipOp = getRecipOp("sqrt", VT);
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
- return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+ return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
-SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled,
+ int &RefinementSteps) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- std::string RecipOp = getRecipOp("div", VT);
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+ return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
+++ /dev/null
-//===-------------------------- TargetRecip.cpp ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetRecip.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-// These are the names of the individual reciprocal operations. These are
-// the key strings for queries and command-line inputs.
-// In addition, the command-line interface recognizes the global parameters
-// "all", "none", and "default".
-static const char *const RecipOps[] = {
- "divd",
- "divf",
- "vec-divd",
- "vec-divf",
- "sqrtd",
- "sqrtf",
- "vec-sqrtd",
- "vec-sqrtf",
-};
-
-/// All operations are disabled by default and refinement steps are set to zero.
-TargetRecip::TargetRecip() {
- unsigned NumStrings = llvm::array_lengthof(RecipOps);
- for (unsigned i = 0; i < NumStrings; ++i)
- RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
-}
-
-static bool parseRefinementStep(StringRef In, size_t &Position,
- uint8_t &Value) {
- const char RefStepToken = ':';
- Position = In.find(RefStepToken);
- if (Position == StringRef::npos)
- return false;
-
- StringRef RefStepString = In.substr(Position + 1);
- // Allow exactly one numeric character for the additional refinement
- // step parameter.
- if (RefStepString.size() == 1) {
- char RefStepChar = RefStepString[0];
- if (RefStepChar >= '0' && RefStepChar <= '9') {
- Value = RefStepChar - '0';
- return true;
- }
- }
- report_fatal_error("Invalid refinement step for -recip.");
-}
-
-bool TargetRecip::parseGlobalParams(const std::string &Arg) {
- StringRef ArgSub = Arg;
-
- // Look for an optional setting of the number of refinement steps needed
- // for this type of reciprocal operation.
- size_t RefPos;
- uint8_t RefSteps;
- StringRef RefStepString;
- if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
- // Split the string for further processing.
- RefStepString = ArgSub.substr(RefPos + 1);
- ArgSub = ArgSub.substr(0, RefPos);
- }
- bool Enable;
- bool UseDefaults;
- if (ArgSub == "all") {
- UseDefaults = false;
- Enable = true;
- } else if (ArgSub == "none") {
- UseDefaults = false;
- Enable = false;
- } else if (ArgSub == "default") {
- UseDefaults = true;
- } else {
- // Any other string is invalid or an individual setting.
- return false;
- }
-
- // All enable values will be initialized to target defaults if 'default' was
- // specified.
- if (!UseDefaults)
- for (auto &KV : RecipMap)
- KV.second.Enabled = Enable;
-
- // Custom refinement count was specified with all, none, or default.
- if (!RefStepString.empty())
- for (auto &KV : RecipMap)
- KV.second.RefinementSteps = RefSteps;
-
- return true;
-}
-
-void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
- static const char DisabledPrefix = '!';
- unsigned NumArgs = Args.size();
-
- for (unsigned i = 0; i != NumArgs; ++i) {
- StringRef Val = Args[i];
-
- bool IsDisabled = Val[0] == DisabledPrefix;
- // Ignore the disablement token for string matching.
- if (IsDisabled)
- Val = Val.substr(1);
-
- size_t RefPos;
- uint8_t RefSteps;
- StringRef RefStepString;
- if (parseRefinementStep(Val, RefPos, RefSteps)) {
- // Split the string for further processing.
- RefStepString = Val.substr(RefPos + 1);
- Val = Val.substr(0, RefPos);
- }
-
- RecipIter Iter = RecipMap.find(Val);
- if (Iter == RecipMap.end()) {
- // Try again specifying float suffix.
- Iter = RecipMap.find(Val.str() + 'f');
- if (Iter == RecipMap.end()) {
- Iter = RecipMap.find(Val.str() + 'd');
- assert(Iter == RecipMap.end() && "Float entry missing from map");
- report_fatal_error("Invalid option for -recip.");
- }
- }
-
- // Mark the matched option as found. Do not allow duplicate specifiers.
- Iter->second.Enabled = !IsDisabled;
- if (!RefStepString.empty())
- Iter->second.RefinementSteps = RefSteps;
-
- // If the precision was not specified, the double entry is also initialized.
- if (Val.back() != 'f' && Val.back() != 'd') {
- RecipParams &Params = RecipMap[Val.str() + 'd'];
- Params.Enabled = !IsDisabled;
- if (!RefStepString.empty())
- Params.RefinementSteps = RefSteps;
- }
- }
-}
-
-void TargetRecip::set(StringRef &RecipString) {
- SmallVector<StringRef, 4> RecipStringVector;
- SplitString(RecipString, RecipStringVector, ",");
- std::vector<std::string> RecipVector;
- for (unsigned i = 0; i < RecipStringVector.size(); ++i)
- RecipVector.push_back(RecipStringVector[i].str());
-
- unsigned NumArgs = RecipVector.size();
-
- // Check if "all", "default", or "none" was specified.
- if (NumArgs == 1 && parseGlobalParams(RecipVector[0]))
- return;
-
- parseIndividualParams(RecipVector);
-}
-
-bool TargetRecip::isEnabled(StringRef Key) const {
- ConstRecipIter Iter = RecipMap.find(Key);
- assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
- return Iter->second.Enabled;
-}
-
-unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
- ConstRecipIter Iter = RecipMap.find(Key);
- assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
- return Iter->second.RefinementSteps;
-}
-
-void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) {
- if (Key == "all") {
- for (auto &KV : RecipMap) {
- RecipParams &RP = KV.second;
- RP.Enabled = Enable;
- RP.RefinementSteps = RefSteps;
- }
- } else {
- RecipParams &RP = RecipMap[Key];
- RP.Enabled = Enable;
- RP.RefinementSteps = RefSteps;
- }
-}
-
-bool TargetRecip::operator==(const TargetRecip &Other) const {
- for (const auto &KV : RecipMap) {
- StringRef Op = KV.first;
- const RecipParams &RP = KV.second;
- const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
- if (RP.RefinementSteps != OtherRP.RefinementSteps)
- return false;
- if (RP.Enabled != OtherRP.Enabled)
- return false;
- }
- return true;
-}
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRecip.h"
#include "X86IntrinsicsInfo.h"
#include <bitset>
#include <numeric>
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- // By default (and when -ffast-math is on), enable estimate codegen with 1
- // refinement step for floats (not doubles) except scalar division. Scalar
- // division estimates are disabled because they break too much real-world
- // code. These defaults are intended to match GCC behavior.
- ReciprocalEstimates.set("sqrtf", true, 1);
- ReciprocalEstimates.set("divf", false, 1);
- ReciprocalEstimates.set("vec-sqrtf", true, 1);
- ReciprocalEstimates.set("vec-divf", true, 1);
-
// For 64-bit, since we have so many registers, use the ILP scheduler.
// For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const {
EVT VT = Op.getValueType();
- const char *RecipOp;
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget.hasSSE1())
- RecipOp = "sqrtf";
- else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget.hasAVX()))
- RecipOp = "vec-sqrtf";
- else
- return SDValue();
+ if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX())) {
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 1;
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- UseOneConstNR = false;
- return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ UseOneConstNR = false;
+ return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
}
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
+ int Enabled,
+ int &RefinementSteps) const {
EVT VT = Op.getValueType();
- const char *RecipOp;
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget.hasSSE1())
- RecipOp = "divf";
- else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget.hasAVX()))
- RecipOp = "vec-divf";
- else
- return SDValue();
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
+ if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX())) {
+ // Enable estimate codegen with 1 refinement step for vector division.
+ // Scalar division estimates are disabled because they break too much
+ // real-world code. These defaults are intended to match GCC behavior.
+ if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
+ return SDValue();
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 1;
+
+ return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
}
/// If we have at least two divisions that use the same divisor, convert to
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
/// Use rsqrt* to speed up sqrt calculations.
- SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const override;
/// Use rcp* to speed up fdiv calculations.
- SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
unsigned combineRepeatedFPDivisors() const override;