/// make the right decision when generating code for different targets.
const X86Subtarget *Subtarget;
- /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
- /// floating point ops.
- /// When SSE is available, use it for f32 operations.
- /// When SSE2 is available, use it for f64 operations.
- bool X86ScalarSSEf64;
- bool X86ScalarSSEf32;
- bool X86ScalarSSEf16;
-
public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo) {
Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
- X86ScalarSSEf64 = Subtarget->hasSSE2();
- X86ScalarSSEf32 = Subtarget->hasSSE1();
- X86ScalarSSEf16 = Subtarget->hasFP16();
}
bool fastSelectInstruction(const Instruction *I) override;
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
- return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
- (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
+ return (VT == MVT::f64 && Subtarget->hasSSE2()) ||
+ (VT == MVT::f32 && Subtarget->hasSSE1()) ||
+ (VT == MVT::f16 && Subtarget->hasFP16());
}
bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
VT = evt.getSimpleVT();
// For now, require SSE/SSE2 for performing floating-point operations,
// since x87 requires additional work.
- if (VT == MVT::f64 && !X86ScalarSSEf64)
+ if (VT == MVT::f64 && !Subtarget->hasSSE2())
return false;
- if (VT == MVT::f32 && !X86ScalarSSEf32)
+ if (VT == MVT::f32 && !Subtarget->hasSSE1())
return false;
// Similarly, no f80 support yet.
if (VT == MVT::f80)
bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
bool HasSSE41 = Subtarget->hasSSE41();
bool HasAVX = Subtarget->hasAVX();
bool HasAVX2 = Subtarget->hasAVX2();
Opc = X86::MOV64rm;
break;
case MVT::f32:
- if (X86ScalarSSEf32)
- Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
- HasAVX ? X86::VMOVSSrm_alt :
- X86::MOVSSrm_alt;
- else
- Opc = X86::LD_Fp32m;
+ Opc = HasAVX512 ? X86::VMOVSSZrm_alt
+ : HasAVX ? X86::VMOVSSrm_alt
+ : HasSSE1 ? X86::MOVSSrm_alt
+ : X86::LD_Fp32m;
break;
case MVT::f64:
- if (X86ScalarSSEf64)
- Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
- HasAVX ? X86::VMOVSDrm_alt :
- X86::MOVSDrm_alt;
- else
- Opc = X86::LD_Fp64m;
+ Opc = HasAVX512 ? X86::VMOVSDZrm_alt
+ : HasAVX ? X86::VMOVSDrm_alt
+ : HasSSE2 ? X86::MOVSDrm_alt
+ : X86::LD_Fp64m;
break;
case MVT::f80:
// No f80 support yet.
Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
break;
case MVT::f32:
- if (X86ScalarSSEf32) {
+ if (HasSSE1) {
if (IsNonTemporal && HasSSE4A)
Opc = X86::MOVNTSS;
else
Opc = X86::ST_Fp32m;
break;
case MVT::f64:
- if (X86ScalarSSEf32) {
+ if (HasSSE2) {
if (IsNonTemporal && HasSSE4A)
Opc = X86::MOVNTSD;
else
static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
bool HasAVX512 = Subtarget->hasAVX512();
bool HasAVX = Subtarget->hasAVX();
- bool X86ScalarSSEf32 = Subtarget->hasSSE1();
- bool X86ScalarSSEf64 = Subtarget->hasSSE2();
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
case MVT::f32:
- return X86ScalarSSEf32
- ? (HasAVX512 ? X86::VUCOMISSZrr
- : HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr)
- : 0;
+ return HasAVX512 ? X86::VUCOMISSZrr
+ : HasAVX ? X86::VUCOMISSrr
+ : HasSSE1 ? X86::UCOMISSrr
+ : 0;
case MVT::f64:
- return X86ScalarSSEf64
- ? (HasAVX512 ? X86::VUCOMISDZrr
- : HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr)
- : 0;
+ return HasAVX512 ? X86::VUCOMISDZrr
+ : HasAVX ? X86::VUCOMISDrr
+ : HasSSE2 ? X86::UCOMISDrr
+ : 0;
}
}
}
bool X86FastISel::X86SelectFPExt(const Instruction *I) {
- if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
+ if (Subtarget->hasSSE2() && I->getType()->isDoubleTy() &&
I->getOperand(0)->getType()->isFloatTy()) {
bool HasAVX512 = Subtarget->hasAVX512();
// fpext from float to double.
}
bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
- if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
+ if (Subtarget->hasSSE2() && I->getType()->isFloatTy() &&
I->getOperand(0)->getType()->isDoubleTy()) {
bool HasAVX512 = Subtarget->hasAVX512();
// fptrunc from double to float.
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
bool HasAVX = Subtarget->hasAVX();
bool HasAVX512 = Subtarget->hasAVX512();
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
- if (X86ScalarSSEf32)
- Opc = HasAVX512 ? X86::VMOVSSZrm_alt :
- HasAVX ? X86::VMOVSSrm_alt :
- X86::MOVSSrm_alt;
- else
- Opc = X86::LD_Fp32m;
+ Opc = HasAVX512 ? X86::VMOVSSZrm_alt
+ : HasAVX ? X86::VMOVSSrm_alt
+ : HasSSE1 ? X86::MOVSSrm_alt
+ : X86::LD_Fp32m;
break;
case MVT::f64:
- if (X86ScalarSSEf64)
- Opc = HasAVX512 ? X86::VMOVSDZrm_alt :
- HasAVX ? X86::VMOVSDrm_alt :
- X86::MOVSDrm_alt;
- else
- Opc = X86::LD_Fp64m;
+ Opc = HasAVX512 ? X86::VMOVSDZrm_alt
+ : HasAVX ? X86::VMOVSDrm_alt
+ : HasSSE2 ? X86::MOVSDrm_alt
+ : X86::LD_Fp64m;
break;
case MVT::f80:
// No f80 support yet.
default:
break;
case MVT::f32:
- if (!X86ScalarSSEf32)
+ if (!Subtarget->hasSSE1())
Opc = X86::LD_Fp032;
break;
case MVT::f64:
- if (!X86ScalarSSEf64)
+ if (!Subtarget->hasSSE2())
Opc = X86::LD_Fp064;
break;
case MVT::f80:
return 0;
// Get opcode and regclass for the given zero.
+ bool HasSSE1 = Subtarget->hasSSE1();
+ bool HasSSE2 = Subtarget->hasSSE2();
bool HasAVX512 = Subtarget->hasAVX512();
unsigned Opc = 0;
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
- if (X86ScalarSSEf32)
- Opc = HasAVX512 ? X86::AVX512_FsFLD0SS : X86::FsFLD0SS;
- else
- Opc = X86::LD_Fp032;
+ Opc = HasAVX512 ? X86::AVX512_FsFLD0SS
+ : HasSSE1 ? X86::FsFLD0SS
+ : X86::LD_Fp032;
break;
case MVT::f64:
- if (X86ScalarSSEf64)
- Opc = HasAVX512 ? X86::AVX512_FsFLD0SD : X86::FsFLD0SD;
- else
- Opc = X86::LD_Fp064;
+ Opc = HasAVX512 ? X86::AVX512_FsFLD0SD
+ : HasSSE2 ? X86::FsFLD0SD
+ : X86::LD_Fp064;
break;
case MVT::f80:
// No f80 support yet.
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
- X86ScalarSSEf64 = Subtarget.hasSSE2();
- X86ScalarSSEf32 = Subtarget.hasSSE1();
- X86ScalarSSEf16 = Subtarget.hasFP16();
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Set up the TargetLowering object.
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!Subtarget.hasSSE2()) {
setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
if (Subtarget.is64Bit()) {
setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
- if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
+ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
// f32 and f64 use SSE.
// Set up the FP register classes.
addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
- } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
+ } else if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1() &&
(UseX87 || Is64Bit)) {
// Use SSE for f32, x87 for f64.
// Set up the FP register classes.
bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
if (VT == MVT::f32)
- return X86ScalarSSEf32;
+ return Subtarget.hasSSE1();
if (VT == MVT::f64)
- return X86ScalarSSEf64;
+ return Subtarget.hasSSE2();
return true;
}
return Subtarget.hasLZCNT();
}
+bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
+ return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
+ (VT == MVT::f16 && Subtarget.hasFP16());
+}
+
+bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
+ // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+ // expensive than a straight movsd. On the other hand, it's important to
+ // shrink long double fp constant since fldt is very slow.
+ return !Subtarget.hasSSE2() || VT == MVT::f80;
+}
+
+bool X86TargetLowering::isScalarFPTypeInSSEReg(EVT VT) const {
+ return (VT == MVT::f64 && Subtarget.hasSSE2()) ||
+ (VT == MVT::f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::f16 && Subtarget.hasFP16());
+}
+
bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
const SelectionDAG &DAG,
const MachineMemOperand &MMO) const {
// The transform for i64->f64 isn't correct for 0 when rounding to negative
// infinity. It produces -0.0, so disable under strictfp.
- if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict)
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && Subtarget.hasSSE2() &&
+ !IsStrict)
return LowerUINT_TO_FP_i64(Op, DAG, Subtarget);
- if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80)
+ if (SrcVT == MVT::i32 && Subtarget.hasSSE2() && DstVT != MVT::f80)
return LowerUINT_TO_FP_i32(Op, DAG, Subtarget);
if (Subtarget.is64Bit() && SrcVT == MVT::i64 &&
(DstVT == MVT::f32 || DstVT == MVT::f64))
bool isCtlzFast() const override;
- bool hasBitPreservingFPLogic(EVT VT) const override {
- return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
- (VT == MVT::f16 && X86ScalarSSEf16);
- }
+ bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
/// If true, then instruction selection should
/// seek to shrink the FP constant of the specified type to a smaller type
/// in order to save space and / or reduce runtime.
- bool ShouldShrinkFPConstant(EVT VT) const override {
- // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
- // expensive than a straight movsd. On the other hand, it's important to
- // shrink long double fp constant since fldt is very slow.
- return !X86ScalarSSEf64 || VT == MVT::f80;
- }
+ bool ShouldShrinkFPConstant(EVT VT) const override;
/// Return true if we believe it is correct and profitable to reduce the
/// load node to a smaller type.
/// Return true if the specified scalar FP type is computed in an SSE
/// register, not on the X87 floating point stack.
- bool isScalarFPTypeInSSEReg(EVT VT) const {
- return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
- (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
- }
+ bool isScalarFPTypeInSSEReg(EVT VT) const;
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
/// make the right decision when generating code for different targets.
const X86Subtarget &Subtarget;
- /// Select between SSE or x87 floating point ops.
- /// When SSE is available, use it for f32 operations.
- /// When SSE2 is available, use it for f64 operations.
- bool X86ScalarSSEf32;
- bool X86ScalarSSEf64;
- bool X86ScalarSSEf16;
-
/// A list of legal FP immediates.
std::vector<APFloat> LegalFPImmediates;