From 1162857ec4cfe0ca32ca78d0f57dfbb593192998 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Nov 2016 04:12:46 +0000 Subject: [PATCH] [AVX-512] Lower AVX cvtpd2ps intrinsic to ISD::FP_ROUND so it can use EVEX instruction when available. llvm-svn: 286057 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ++++++- llvm/lib/Target/X86/X86InstrSSE.td | 12 ++++-------- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 8 +++++--- llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index dd5d195..a8f2a20 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18062,7 +18062,12 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget Src1, Src2, Src3, Src4), Mask, PassThru, Subtarget, DAG); } - case CVTPD2PS: { + case CVTPD2PS: + // ISD::FP_ROUND has a second argument that indicates if the truncation + // does not change the value. Set it to 0 since it can change. + return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1), + DAG.getIntPtrConstant(0, dl)); + case CVTPD2PS_MASK: { SDValue Src = Op.getOperand(1); SDValue PassThru = Op.getOperand(2); SDValue Mask = Op.getOperand(3); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index dbcf0d7..3e83f69 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2228,16 +2228,16 @@ def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [], IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; // YMM only +let Predicates = [HasAVX, NoVLX] in { def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], + [(set VR128:$dst, (fpround VR256:$src))], IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))], + [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))], IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; +} def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>; @@ -2261,10 +2261,6 @@ let Predicates = [HasAVX, NoVLX] in { (VCVTPD2PSrr VR128:$src)>; def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))), (VCVTPD2PSXrm addr:$src)>; - def : Pat<(v4f32 (fpround (v4f64 VR256:$src))), - (VCVTPD2PSYrr VR256:$src)>; - def : Pat<(v4f32 (fpround (loadv4f64 addr:$src))), - (VCVTPD2PSYrm addr:$src)>; def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), (VCVTPS2PDrr VR128:$src)>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 7565471..fd8e397 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -23,7 +23,8 @@ enum IntrinsicType : uint16_t { INTR_NO_TYPE, GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS, INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP, - CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, CVTPD2PS, + CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, + CVTPD2PS, CVTPD2PS_MASK, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, @@ -251,6 +252,7 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) { * the alphabetical order. */ static const IntrinsicData IntrinsicsWithoutChain[] = { + X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), @@ -503,9 +505,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS, + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS_MASK, ISD::FP_ROUND, 0), - X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS, + X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK, ISD::FP_ROUND, X86ISD::VFPROUND_RND), X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK, X86ISD::CVTP2SI, 0), diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index a6835ad..da1df98 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -2824,7 +2824,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) { ; ; AVX512VL-LABEL: test_x86_avx_cvt_pd2_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtpd2psy %ymm0, %xmm0 ## encoding: [0xc5,0xfd,0x5a,0xc0] +; AVX512VL-NEXT: vcvtpd2ps %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x5a,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res -- 2.7.4