From 29f1a1f83467bd3647023cc6e855a64c955b49f0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 21 Sep 2016 06:37:54 +0000 Subject: [PATCH] [AVX-512] Split the 3 different usages of the X86ISD::FSETCC opcode into 3 different opcodes. It turns out isel is really not robust against having different type profiles for the same opcode. It turns out that if you put an illegal rounding mode(i.e. not CUR_DIRECTION or NO_EXC) on a comiss intrinsic we would generate the FSETCC form with the rounding mode added, but then pattern match to an instruction with ROUND_CUR_DIRECTION. We can probably get away with just one FSETCCM opcode that always contains the rounding mode and explicitly put ROUND_CUR_DIRECTION in the pattern, but I'll leave that for future work. With this change the clang tests for the comiss intrinsics that used an incorrect rounding mode of 3 properly fail isel instead of silently doing the wrong thing. Those clang tests will be fixed in a follow up commit and I also plan to add rounding mode checking to clang. llvm-svn: 282055 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 +++++++++------ llvm/lib/Target/X86/X86ISelLowering.h | 4 ++++ llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 ++-- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 8 ++++---- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 19e010a..9477f4d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16014,8 +16014,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { if (SSECC != 8) { if (Subtarget.hasAVX512()) { - SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1, - DAG.getConstant(SSECC, DL, MVT::i8)); + SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CondOp0, + CondOp1, DAG.getConstant(SSECC, DL, MVT::i8)); return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2); } @@ -17541,7 +17541,8 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, // The mask should be of type MVT::i1 SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask); - if (Op.getOpcode() == X86ISD::FSETCC) + if (Op.getOpcode() == X86ISD::FSETCCM || + Op.getOpcode() == X86ISD::FSETCCM_RND) return DAG.getNode(ISD::AND, dl, VT, Op, IMask); if (Op.getOpcode() == X86ISD::VFPCLASS || Op.getOpcode() == X86ISD::VFPCLASSS) @@ -18108,10 +18109,10 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue FCmp; if (cast(Sae)->getZExtValue() == X86::STATIC_ROUNDING::CUR_DIRECTION) - FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS, + FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS, DAG.getConstant(CondVal, dl, MVT::i8)); else - FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS, + FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::i1, LHS, RHS, DAG.getConstant(CondVal, dl, MVT::i8), Sae); // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg" return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp); @@ -22437,6 +22438,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::SETCC: return "X86ISD::SETCC"; case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; case X86ISD::FSETCC: return "X86ISD::FSETCC"; + case X86ISD::FSETCCM: return "X86ISD::FSETCCM"; + case X86ISD::FSETCCM_RND: return "X86ISD::FSETCCM_RND"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; @@ -28483,7 +28486,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, // See X86ATTInstPrinter.cpp:printSSECC(). unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; if (Subtarget.hasAVX512()) { - SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00, + SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CMP00, CMP01, DAG.getConstant(x86cc, DL, MVT::i8)); if (N->getValueType(0) != MVT::i1) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3750281..ab32487f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -106,6 +106,10 @@ namespace llvm { /// 0s or 1s. Generally DTRT for C/C++ with NaNs. FSETCC, + /// X86 FP SETCC, similar to above, but with output as an i1 mask and + /// with optional rounding mode. + FSETCCM, FSETCCM_RND, + /// X86 conditional moves. Operand 0 and operand 1 are the two values /// to select from. Operand 2 is the condition code, and operand 3 is the /// flag operand produced by a CMP or TEST instruction. It also writes a diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index f8001fb..17c7e35 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -199,8 +199,8 @@ def X86CmpMaskCCScalarRound : def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>; def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; -def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; -def X86cmpmsRnd : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalarRound>; +def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; +def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND", X86CmpMaskCCScalarRound>; def X86vshl : SDNode<"X86ISD::VSHL", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 6ab562c..ac0e1e8 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -436,10 +436,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_cmp_q_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_q_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_q_512, CMP_MASK_CC, X86ISD::CMPM, 0), - X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, X86ISD::FSETCC, - X86ISD::FSETCC), - X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, X86ISD::FSETCC, - X86ISD::FSETCC), + X86_INTRINSIC_DATA(avx512_mask_cmp_sd, CMP_MASK_SCALAR_CC, + X86ISD::FSETCCM, X86ISD::FSETCCM_RND), + X86_INTRINSIC_DATA(avx512_mask_cmp_ss, CMP_MASK_SCALAR_CC, + X86ISD::FSETCCM, X86ISD::FSETCCM_RND), X86_INTRINSIC_DATA(avx512_mask_cmp_w_128, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_256, CMP_MASK_CC, X86ISD::CMPM, 0), X86_INTRINSIC_DATA(avx512_mask_cmp_w_512, CMP_MASK_CC, X86ISD::CMPM, 0), -- 2.7.4