From: Craig Topper Date: Thu, 10 Nov 2016 07:47:17 +0000 (+0000) Subject: [AVX-512] Allow legacy cvtpd2dq intrinsics to select EVEX encoded instruction when... X-Git-Tag: llvmorg-4.0.0-rc1~5058 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bd298c37d1a8a9b952b235a8866a35753739baff;p=platform%2Fupstream%2Fllvm.git [AVX-512] Allow legacy cvtpd2dq intrinsics to select EVEX encoded instruction when available. llvm-svn: 286435 --- diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index fbe7712..d958056 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1957,13 +1957,14 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // Convert Packed Double FP to Packed DW Integers -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + [(set VR128:$dst, + (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>, VEX, Sched<[WriteCvtF2I]>; // XMM only @@ -1972,19 +1973,19 @@ def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtpd2dqx\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX, + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX, Sched<[WriteCvtF2ILd]>; // YMM only def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, - Sched<[WriteCvtF2I]>; + (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>, + VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>, + (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>; @@ -1993,11 +1994,12 @@ def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], + (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))], IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], + [(set VR128:$dst, + (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))], IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; // Convert with truncation packed single/double fp to doubleword diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 25c5a89..f023739 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -253,6 +253,7 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) { */ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), + X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), @@ -1637,6 +1638,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_comilt_sd, COMI, X86ISD::COMI, ISD::SETLT), X86_INTRINSIC_DATA(sse2_comineq_sd, COMI, X86ISD::COMI, ISD::SETNE), X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), + X86_INTRINSIC_DATA(sse2_cvtpd2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0), X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 10b7117..ce40345 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -246,10 +246,15 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { -; CHECK-LABEL: test_x86_sse2_cvtpd2dq: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0] -; CHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse2_cvtpd2dq: +; AVX: ## BB#0: +; AVX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_sse2_cvtpd2dq: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -2851,7 +2856,7 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) { ; ; AVX512VL-LABEL: test_x86_avx_cvt_pd2dq_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vcvtpd2dqy %ymm0, %xmm0 ## encoding: [0xc5,0xff,0xe6,0xc0] +; AVX512VL-NEXT: vcvtpd2dq %ymm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x28,0xe6,0xc0] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index f09bcfa..123a3ea 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -252,10 +252,15 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { ; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0xe6,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_cvtpd2dq: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_cvtpd2dq: +; AVX2: ## BB#0: +; AVX2-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_cvtpd2dq: +; SKX: ## BB#0: +; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -279,7 +284,7 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind { ; ; SKX-LABEL: test_mm_cvtpd_epi32_zext: ; SKX: ## BB#0: -; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xe6,0xc0] +; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0xe6,0xc0] ; SKX-NEXT: vmovq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x7e,0xc0] ; SKX-NEXT: ## xmm0 = xmm0[0],zero ; SKX-NEXT: retl ## encoding: [0xc3]