This reverts commit r238842.
It broke -DBUILD_SHARED_LIBS=ON build.
llvm-svn: 238900
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRecip.h"
#include <string>
using namespace llvm;
"Only fuse FP ops when the result won't be effected."),
clEnumValEnd));
-cl::list<std::string>
-ReciprocalOps("recip",
- cl::CommaSeparated,
- cl::desc("Choose reciprocal operation types and parameters."),
- cl::value_desc("all,none,default,divf,!vec-sqrtd,vec-divd:0,sqrt:9..."));
-
cl::opt<bool>
DontPlaceZerosInBSS("nozero-initialized-in-bss",
cl::desc("Don't place zero-initialized symbols into bss section"),
TargetOptions Options;
Options.LessPreciseFPMADOption = EnableFPMAD;
Options.AllowFPOpFusion = FuseFPOps;
- Options.Reciprocals = TargetRecip(ReciprocalOps);
Options.UnsafeFPMath = EnableUnsafeFPMath;
Options.NoInfsFPMath = EnableNoInfsFPMath;
Options.NoNaNsFPMath = EnableNoNaNsFPMath;
#ifndef LLVM_TARGET_TARGETOPTIONS_H
#define LLVM_TARGET_TARGETOPTIONS_H
-#include "llvm/Target/TargetRecip.h"
#include "llvm/MC/MCTargetOptions.h"
#include <string>
CompressDebugSections(false), FunctionSections(false),
DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
TrapFuncName(), FloatABIType(FloatABI::Default),
- AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
- JTType(JumpTable::Single),
+ AllowFPOpFusion(FPOpFusion::Standard), JTType(JumpTable::Single),
ThreadModel(ThreadModel::POSIX) {}
/// PrintMachineCode - This flag is enabled when the -print-machineinstrs
/// the value of this option.
FPOpFusion::FPOpFusionMode AllowFPOpFusion;
- /// This class encapsulates options for reciprocal-estimate code generation.
- TargetRecip Reciprocals;
-
/// JTType - This flag specifies the type of jump-instruction table to
/// create for functions that have the jumptable attribute.
JumpTable::JumpTableType JTType;
ARE_EQUAL(TrapFuncName) &&
ARE_EQUAL(FloatABIType) &&
ARE_EQUAL(AllowFPOpFusion) &&
- ARE_EQUAL(Reciprocals) &&
ARE_EQUAL(JTType) &&
ARE_EQUAL(ThreadModel) &&
ARE_EQUAL(MCOptions);
+++ /dev/null
-//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGETRECIP_H
-#define LLVM_TARGET_TARGETRECIP_H
-
-#include "llvm/ADT/StringRef.h"
-#include <vector>
-#include <string>
-#include <map>
-
-namespace llvm {
-
-struct TargetRecip {
-public:
- TargetRecip();
-
- /// Initialize all or part of the operations from command-line options or
- /// a front end.
- TargetRecip(const std::vector<std::string> &Args);
-
- /// Set whether a particular reciprocal operation is enabled and how many
- /// refinement steps are needed when using it. Use "all" to set enablement
- /// and refinement steps for all operations.
- void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps);
-
- /// Return true if the reciprocal operation has been enabled by default or
- /// from the command-line. Return false if the operation has been disabled
- /// by default or from the command-line.
- bool isEnabled(const StringRef &Key) const;
-
- /// Return the number of iterations necessary to refine the
- /// the result of a machine instruction for the given reciprocal operation.
- unsigned getRefinementSteps(const StringRef &Key) const;
-
- bool operator==(const TargetRecip &Other) const;
-
-private:
- enum {
- Uninitialized = -1
- };
-
- struct RecipParams {
- int8_t Enabled;
- int8_t RefinementSteps;
-
- RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
- };
-
- std::map<StringRef, RecipParams> RecipMap;
- typedef std::map<StringRef, RecipParams>::iterator RecipIter;
- typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
-
- bool parseGlobalParams(const std::string &Arg);
- void parseIndividualParams(const std::vector<std::string> &Args);
-};
-
-} // End llvm namespace
-
-#endif
TargetLoweringObjectFile.cpp
TargetMachine.cpp
TargetMachineC.cpp
- TargetRecip.cpp
TargetSubtargetInfo.cpp
ADDITIONAL_HEADER_DIRS
+++ /dev/null
-//===-------------------------- TargetRecip.cpp ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetRecip.h"
-#include <map>
-
-using namespace llvm;
-
-// These are the names of the individual reciprocal operations. These are
-// the key strings for queries and command-line inputs.
-// In addition, the command-line interface recognizes the global parameters
-// "all", "none", and "default".
-static const char *RecipOps[] = {
- "divd",
- "divf",
- "vec-divd",
- "vec-divf",
- "sqrtd",
- "sqrtf",
- "vec-sqrtd",
- "vec-sqrtf",
-};
-
-// The uninitialized state is needed for the enabled settings and refinement
-// steps because custom settings may arrive via the command-line before target
-// defaults are set.
-TargetRecip::TargetRecip() {
- unsigned NumStrings = llvm::array_lengthof(RecipOps);
- for (unsigned i = 0; i < NumStrings; ++i)
- RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
-}
-
-static bool parseRefinementStep(const StringRef &In, size_t &Position,
- uint8_t &Value) {
- const char RefStepToken = ':';
- Position = In.find(RefStepToken);
- if (Position == StringRef::npos)
- return false;
-
- StringRef RefStepString = In.substr(Position + 1);
- // Allow exactly one numeric character for the additional refinement
- // step parameter.
- if (RefStepString.size() == 1) {
- char RefStepChar = RefStepString[0];
- if (RefStepChar >= '0' && RefStepChar <= '9') {
- Value = RefStepChar - '0';
- return true;
- }
- }
- report_fatal_error("Invalid refinement step for -recip.");
-}
-
-bool TargetRecip::parseGlobalParams(const std::string &Arg) {
- StringRef ArgSub = Arg;
-
- // Look for an optional setting of the number of refinement steps needed
- // for this type of reciprocal operation.
- size_t RefPos;
- uint8_t RefSteps;
- StringRef RefStepString;
- if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
- // Split the string for further processing.
- RefStepString = ArgSub.substr(RefPos + 1);
- ArgSub = ArgSub.substr(0, RefPos);
- }
- bool Enable;
- bool UseDefaults;
- if (ArgSub == "all") {
- UseDefaults = false;
- Enable = true;
- } else if (ArgSub == "none") {
- UseDefaults = false;
- Enable = false;
- } else if (ArgSub == "default") {
- UseDefaults = true;
- } else {
- // Any other string is invalid or an individual setting.
- return false;
- }
-
- // All enable values will be initialized to target defaults if 'default' was
- // specified.
- if (!UseDefaults)
- for (auto &KV : RecipMap)
- KV.second.Enabled = Enable;
-
- // Custom refinement count was specified with all, none, or default.
- if (!RefStepString.empty())
- for (auto &KV : RecipMap)
- KV.second.RefinementSteps = RefSteps;
-
- return true;
-}
-
-void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
- static const char DisabledPrefix = '!';
- unsigned NumArgs = Args.size();
-
- for (unsigned i = 0; i != NumArgs; ++i) {
- StringRef Val = Args[i];
-
- bool IsDisabled = Val[0] == DisabledPrefix;
- // Ignore the disablement token for string matching.
- if (IsDisabled)
- Val = Val.substr(1);
-
- size_t RefPos;
- uint8_t RefSteps;
- StringRef RefStepString;
- if (parseRefinementStep(Val, RefPos, RefSteps)) {
- // Split the string for further processing.
- RefStepString = Val.substr(RefPos + 1);
- Val = Val.substr(0, RefPos);
- }
-
- RecipIter Iter = RecipMap.find(Val);
- if (Iter == RecipMap.end()) {
- // Try again specifying float suffix.
- Iter = RecipMap.find(Val.str() + 'f');
- if (Iter == RecipMap.end()) {
- Iter = RecipMap.find(Val.str() + 'd');
- assert(Iter == RecipMap.end() && "Float entry missing from map");
- report_fatal_error("Invalid option for -recip.");
- }
-
- // The option was specified without a float or double suffix.
- if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
- // Make sure that the double entry was not already specified.
- // The float entry will be checked below.
- report_fatal_error("Duplicate option for -recip.");
- }
- }
-
- if (Iter->second.Enabled != Uninitialized)
- report_fatal_error("Duplicate option for -recip.");
-
- // Mark the matched option as found. Do not allow duplicate specifiers.
- Iter->second.Enabled = !IsDisabled;
- if (!RefStepString.empty())
- Iter->second.RefinementSteps = RefSteps;
-
- // If the precision was not specified, the double entry is also initialized.
- if (Val.back() != 'f' && Val.back() != 'd') {
- RecipMap[Val.str() + 'd'].Enabled = !IsDisabled;
- if (!RefStepString.empty())
- RecipMap[Val.str() + 'd'].RefinementSteps = RefSteps;
- }
- }
-}
-
-TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
- TargetRecip() {
- unsigned NumArgs = Args.size();
-
- // Check if "all", "default", or "none" was specified.
- if (NumArgs == 1 && parseGlobalParams(Args[0]))
- return;
-
- parseIndividualParams(Args);
-}
-
-bool TargetRecip::isEnabled(const StringRef &Key) const {
- ConstRecipIter Iter = RecipMap.find(Key);
- assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
- assert(Iter->second.Enabled != Uninitialized &&
- "Enablement setting was not initialized");
- return Iter->second.Enabled;
-}
-
-unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
- ConstRecipIter Iter = RecipMap.find(Key);
- assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
- assert(Iter->second.RefinementSteps != Uninitialized &&
- "Refinement step setting was not initialized");
- return Iter->second.RefinementSteps;
-}
-
-/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
- unsigned RefSteps) {
- if (Key == "all") {
- for (auto &KV : RecipMap) {
- RecipParams &RP = KV.second;
- if (RP.Enabled == Uninitialized)
- RP.Enabled = Enable;
- if (RP.RefinementSteps == Uninitialized)
- RP.RefinementSteps = RefSteps;
- }
- } else {
- RecipParams &RP = RecipMap[Key];
- if (RP.Enabled == Uninitialized)
- RP.Enabled = Enable;
- if (RP.RefinementSteps == Uninitialized)
- RP.RefinementSteps = RefSteps;
- }
-}
-
-bool TargetRecip::operator==(const TargetRecip &Other) const {
- for (const auto &KV : RecipMap) {
- const StringRef &Op = KV.first;
- const RecipParams &RP = KV.second;
- const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
- if (RP.RefinementSteps != OtherRP.RefinementSteps)
- return false;
- if (RP.Enabled != OtherRP.Enabled)
- return false;
- }
- return true;
-}
"LEA instruction with certain arguments is slow">;
def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
+def FeatureUseSqrtEst : SubtargetFeature<"use-sqrt-est", "UseSqrtEst", "true",
+ "Use RSQRT* to optimize square root calculations">;
+def FeatureUseRecipEst : SubtargetFeature<"use-recip-est", "UseReciprocalEst",
+ "true", "Use RCP* to optimize division calculations">;
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
FeatureBMI, FeatureF16C, FeatureMOVBE,
FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
- FeatureSlowSHLD]>;
+ FeatureSlowSHLD, FeatureUseSqrtEst, FeatureUseRecipEst]>;
// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
"rather than promotion."),
cl::Hidden);
+static cl::opt<int> ReciprocalEstimateRefinementSteps(
+ "x86-recip-refinement-steps", cl::init(1),
+ cl::desc("Specify the number of Newton-Raphson iterations applied to the "
+ "result of the hardware reciprocal estimate instruction."),
+ cl::NotHidden);
+
// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
SDValue V2);
DAGCombinerInfo &DCI,
unsigned &RefinementSteps,
bool &UseOneConstNR) const {
+ // FIXME: We should use instruction latency models to calculate the cost of
+ // each potential sequence, but this is very hard to do reliably because
+ // at least Intel's Core* chips have variable timing based on the number of
+ // significant digits in the divisor and/or sqrt operand.
+ if (!Subtarget->useSqrtEst())
+ return SDValue();
+
EVT VT = Op.getValueType();
- const char *RecipOp;
- // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
+ // SSE1 has rsqrtss and rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// rsqrt estimate with refinement on x86 prior to FMA requires at least 16
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget->hasSSE1())
- RecipOp = "sqrtf";
- else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget->hasAVX()))
- RecipOp = "vec-sqrtf";
- else
- return SDValue();
-
- TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- UseOneConstNR = false;
- return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+ RefinementSteps = 1;
+ UseOneConstNR = false;
+ return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
}
/// The minimum architected relative accuracy is 2^-12. We need one
SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
DAGCombinerInfo &DCI,
unsigned &RefinementSteps) const {
+ // FIXME: We should use instruction latency models to calculate the cost of
+ // each potential sequence, but this is very hard to do reliably because
+ // at least Intel's Core* chips have variable timing based on the number of
+ // significant digits in the divisor.
+ if (!Subtarget->useReciprocalEst())
+ return SDValue();
+
EVT VT = Op.getValueType();
- const char *RecipOp;
-
+
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget->hasSSE1())
- RecipOp = "divf";
- else if ((VT == MVT::v4f32 && Subtarget->hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget->hasAVX()))
- RecipOp = "vec-divf";
- else
- return SDValue();
-
- TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ if ((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+ (Subtarget->hasAVX() && VT == MVT::v8f32)) {
+ RefinementSteps = ReciprocalEstimateRefinementSteps;
+ return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
}
/// If we have at least two divisions that use the same divisor, convert to
LEAUsesAG = false;
SlowLEA = false;
SlowIncDec = false;
+ UseSqrtEst = false;
+ UseReciprocalEst = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
/// True if INC and DEC instructions are slow when writing to flags
bool SlowIncDec;
+ /// Use the RSQRT* instructions to optimize square root calculations.
+ /// For this to be profitable, the cost of FSQRT and FDIV must be
+ /// substantially higher than normal FP ops like FADD and FMUL.
+ bool UseSqrtEst;
+
+ /// Use the RCP* instructions to optimize FP division calculations.
+ /// For this to be profitable, the cost of FDIV must be
+ /// substantially higher than normal FP ops like FADD and FMUL.
+ bool UseReciprocalEst;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
bool LEAusesAG() const { return LEAUsesAG; }
bool slowLEA() const { return SlowLEA; }
bool slowIncDec() const { return SlowIncDec; }
+ bool useSqrtEst() const { return UseSqrtEst; }
+ bool useReciprocalEst() const { return UseReciprocalEst; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
if (Subtarget.isTargetWin64())
this->Options.TrapUnreachable = true;
- // TODO: By default, all reciprocal estimate operations are off because
- // that matches the behavior before TargetRecip was added (except for btver2
- // which used subtarget features to enable this type of codegen).
- // We should change this to match GCC behavior where everything but
- // scalar division estimates are turned on by default with -ffast-math.
- this->Options.Reciprocals.setDefaults("all", false, 1);
-
initAsmInfo();
}
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est | FileCheck %s --check-prefix=RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-recip-est -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,use-sqrt-est | FileCheck %s --check-prefix=ESTIMATE
declare double @__sqrt_finite(double) #0
declare float @__sqrtf_finite(float) #0