setOperationAction(ISD::FREM , MVT::f32 , Expand);
setOperationAction(ISD::FREM , MVT::f64 , Expand);
setOperationAction(ISD::FREM , MVT::f80 , Expand);
+ setOperationAction(ISD::FREM , MVT::f128 , Expand);
setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
// Promote the i8 variants and force them on up to i32 which has a shorter
// There's never any support for operations beyond MVT::f32.
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f80, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);
- // Long double always uses X87, except f128 in SSE.
+ // f80 always uses X87.
if (UseX87) {
- if (Subtarget.is64Bit() && Subtarget.hasSSE1()) {
- addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
- : &X86::VR128RegClass);
- ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
- setOperationAction(ISD::FABS , MVT::f128, Custom);
- setOperationAction(ISD::FNEG , MVT::f128, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
-
- addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
- }
-
addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
setOperationAction(ISD::LLRINT, MVT::f80, Expand);
}
+ // f128 uses xmm registers, but most operations require libcalls.
+ if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
+ addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
+ : &X86::VR128RegClass);
+
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
+
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+
+ setOperationAction(ISD::FABS, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ // We need to custom handle any FP_ROUND with an f128 input, but
+ // LegalizeDAG uses the result type to know when to run a custom handler.
+ // So we have to list all legal floating point result types here.
+ if (isTypeLegal(MVT::f32)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
+ }
+ if (isTypeLegal(MVT::f64)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
+ }
+ if (isTypeLegal(MVT::f80)) {
+ setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
+ }
+
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f80, Expand);
+ }
+
// Always use a library call for pow.
setOperationAction(ISD::FPOW , MVT::f32 , Expand);
setOperationAction(ISD::FPOW , MVT::f64 , Expand);
setOperationAction(ISD::FPOW , MVT::f80 , Expand);
+ setOperationAction(ISD::FPOW , MVT::f128 , Expand);
setOperationAction(ISD::FLOG, MVT::f80, Expand);
setOperationAction(ISD::FLOG2, MVT::f80, Expand);
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Custom);
+
if (!Subtarget.hasAVX512())
setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f32, Custom);
+
setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
+ if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
+ // X >= 0 -> X == 0, jump on !sign.
+ return X86::COND_NS;
+ }
if (SetCCOpcode == ISD::SETLT && RHSC->getAPIntValue() == 1) {
// X < 1 -> X <= 0
RHS = DAG.getConstant(0, DL, RHS.getValueType());
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (VT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
SDValue N0 = Op.getOperand(0);
SDLoc dl(Op);
auto PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT SrcVT = N0.getSimpleValueType();
+ MVT DstVT = Op.getSimpleValueType();
+
+ if (DstVT == MVT::f128)
+ return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
- if (Op.getSimpleValueType().isVector())
+ if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
- MVT SrcVT = N0.getSimpleValueType();
- MVT DstVT = Op.getSimpleValueType();
-
if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
(SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) {
// Conversions from unsigned i32 to f32/f64 are legal,
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
+ if (SrcVT == MVT::f128) {
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(SrcVT, VT);
+ else
+ LC = RTLIB::getFPTOUINT(SrcVT, VT);
+
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, VT, Src, CallOptions, SDLoc(Op)).first;
+ }
+
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases.");
}
-static SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT SVT = In.getSimpleValueType();
+ if (VT == MVT::f128) {
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+ return LowerF128Call(Op, DAG, LC);
+ }
+
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
In, DAG.getUNDEF(SVT)));
}
+SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ SDValue In = Op.getOperand(0);
+ MVT SVT = In.getSimpleValueType();
+
+ // It's legal except when f128 is involved
+ if (SVT != MVT::f128)
+ return Op;
+
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, VT);
+
+ // FP_ROUND node has a second operand indicating whether it is known to be
+ // precise. That doesn't take part in the LibCall so we can't directly use
+ // LowerF128Call.
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first;
+}
+
+// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking
+// the default expansion of STRICT_FP_ROUND.
+static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) {
+ // FIXME: Need to form a libcall with an input chain for f128.
+ assert(Op.getOperand(0).getValueType() != MVT::f128 &&
+ "Don't know how to handle f128 yet!");
+ return Op;
+}
+
/// Horizontal vector math instructions may be slower than normal math with
/// shuffles. Limit horizontal op codegen based on size/speed trade-offs, uarch
/// implementation, and likely shuffle complexity of the alternate sequence.
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
-static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
+SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const {
+ if (Op.getValueType() == MVT::f128) {
+ RTLIB::Libcall LC = Op.getOpcode() == ISD::FADD ? RTLIB::ADD_F128
+ : RTLIB::SUB_F128;
+ return LowerF128Call(Op, DAG, LC);
+ }
+
assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) &&
"Only expecting float/double");
return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
SDLoc dl(Op);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // Handle f128 first, since one possible outcome is a normal integer
+ // comparison which gets handled by emitFlagsForSetcc.
+ if (Op0.getValueType() == MVT::f128) {
+ softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1);
+
+ // If softenSetCCOperands returned a scalar, use it.
+ if (!Op1.getNode()) {
+ assert(Op0.getValueType() == Op.getValueType() &&
+ "Unexpected setcc expansion!");
+ return Op0;
+ }
+ }
+
SDValue X86CC;
SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC);
if (!EFLAGS)
return NOOP;
}
+SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
+ MakeLibCallOptions CallOptions;
+ return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
+}
+
/// Provide custom lowering hooks for some operations.
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+ case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
+ case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG);
case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG);
case ISD::STORE: return LowerStore(Op, Subtarget, DAG);
case ISD::FADD:
- case ISD::FSUB: return lowerFaddFsub(Op, DAG, Subtarget);
+ case ISD::FSUB: return lowerFaddFsub(Op, DAG);
+ case ISD::FMUL: return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV: return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
case ISD::FABS:
case ISD::FNEG: return LowerFABSorFNEG(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);