From af5ee86bc92928c43c339ae1418274b124f04198 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 18 Sep 2016 21:49:32 +0000 Subject: [PATCH] [AVX-512] Don't lower CVTPD2PS intrinsics to ISD::FP_ROUND with an X86 rounding mode encoding in the second operand. This immediate should only be 0 or 1 and indicates if the truncation loses precision. Also enhance an assert in SelectionDAG::getNode to flag this sort of problem in the future. llvm-svn: 281868 --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 ++- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++++++++++++++++++++++++ llvm/lib/Target/X86/X86IntrinsicsInfo.h | 6 +++--- 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 570c26a..78334fb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3603,7 +3603,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && VT.bitsLE(N1.getValueType()) && - N2C && "Invalid FP_ROUND!"); + N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) && + "Invalid FP_ROUND!"); if (N1.getValueType() == VT) return N1; // noop conversion. break; case ISD::AssertSext: diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 70ab194..a4beb83 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17931,6 +17931,31 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Src1, Src2, Src3, Src4), Mask, PassThru, Subtarget, DAG); } + case CVTPD2PS: { + SDValue Src = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + // We add rounding mode to the Node when + // - RM Opcode is specified and + // - RM is not "current direction". + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(4); + unsigned Round = cast(Rnd)->getZExtValue(); + if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) { + return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, Op.getValueType(), + Src, Rnd), + Mask, PassThru, Subtarget, DAG); + } + } + assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!"); + // ISD::FP_ROUND has a second argument that indicates if the truncation + // does not change the value. Set it to 0 since it can change. + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src, + DAG.getIntPtrConstant(0, dl)), + Mask, PassThru, Subtarget, DAG); + } case FPCLASS: { // FPclass intrinsics with mask SDValue Src1 = Op.getOperand(1); diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index ae51274..b5c0299 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,7 @@ enum IntrinsicType : uint16_t { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, CVTPD2PS, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -499,9 +499,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, INTR_TYPE_1OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS, ISD::FP_ROUND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK_RM, + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS, ISD::FP_ROUND, X86ISD::VFPROUND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK, X86ISD::FP_TO_SINT_RND, 0), -- 2.7.4