From e0845d8e8c204240190a2566d85ac2f97f0fc535 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 10 Nov 2016 07:24:52 +0000 Subject: [PATCH] [AVX-512][X86] Convert avx_cvtt_ps2dq_256 and sse2_cvttps2dq intrinsics to ISD::FP_TO_SINT in the intrinsics table and delete patterns. While nearby also move CVTDQ2PS patterns into their instructions. This allows these intrinsics to also use EVEX instructons. llvm-svn: 286434 --- llvm/lib/Target/X86/X86InstrSSE.td | 80 +++++++++------------------- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 + llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 26 ++++++--- llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 13 +++-- 4 files changed, 55 insertions(+), 66 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 8cdd1bc..fbe7712 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1441,15 +1441,18 @@ multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, itins.rm>, Sched<[itins.Sched.Folded]>; } -multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, - X86MemOperand x86memop, string asm, Domain d, - OpndItins itins> { +multiclass sse12_cvt_p opc, RegisterClass RC, X86MemOperand x86memop, + ValueType DstTy, ValueType SrcTy, PatFrag ld_frag, + string asm, Domain d, OpndItins itins> { let hasSideEffects = 0 in { - def rr : I, Sched<[itins.Sched]>; + def rr : I, Sched<[itins.Sched]>; let mayLoad = 1 in - def rm : I, Sched<[itins.Sched.Folded]>; + def rm : I, Sched<[itins.Sched.Folded]>; } } @@ -1722,16 +1725,16 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, ssmem, sse_load_f32, "cvtss2si", SSE_CVT_SS2SI_64>, XS, REX_W; -defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - PS, VEX, Requires<[HasAVX]>; -defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, + PS, VEX, Requires<[HasAVX, NoVLX]>; +defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64, "vcvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, - PS, VEX, VEX_L, Requires<[HasAVX]>; + PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>; -defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64, "cvtdq2ps\t{$src, $dst|$dst, $src}", SSEPackedSingle, SSE_CVT_PS>, PS, Requires<[UseSSE2]>; @@ -1999,72 +2002,41 @@ def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // Convert with truncation packed single/double fp to doubleword // SSE2 packed instructions with XS prefix +let Predicates = [HasAVX, NoVLX] in { def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq VR128:$src))], + (v4i32 (fp_to_sint (v4f32 VR128:$src))))], IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq - (loadv4f32 addr:$src)))], + [(set VR128:$dst, + (v4i32 (fp_to_sint (loadv4f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], + (v8i32 (fp_to_sint (v8f32 VR256:$src))))], IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 - (loadv8f32 addr:$src)))], + [(set VR256:$dst, + (v8i32 (fp_to_sint (loadv8f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; +} def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], + [(set VR128:$dst, + (v4i32 (fp_to_sint (v4f32 VR128:$src))))], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], + (v4i32 (fp_to_sint (memopv4f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; -let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (VCVTDQ2PSrr VR128:$src)>; - def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), - (VCVTDQ2PSrm addr:$src)>; - - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (VCVTTPS2DQrr VR128:$src)>; - def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), - (VCVTTPS2DQrm addr:$src)>; - - def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), - (VCVTDQ2PSYrr VR256:$src)>; - def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))), - (VCVTDQ2PSYrm addr:$src)>; - - def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), - (VCVTTPS2DQYrr VR256:$src)>; - def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), - (VCVTTPS2DQYrm addr:$src)>; -} - -let Predicates = [UseSSE2] in { - def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), - (CVTDQ2PSrr VR128:$src)>; - def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), - (CVTDQ2PSrm addr:$src)>; - - def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (CVTTPS2DQrr VR128:$src)>; - def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), - (CVTTPS2DQrm addr:$src)>; -} - let Predicates = [HasAVX, NoVLX] in def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index efd03b0..25c5a89 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -255,6 +255,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), + X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx_hadd_pd_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hadd_ps_256, INTR_TYPE_2OP, X86ISD::FHADD, 0), X86_INTRINSIC_DATA(avx_hsub_pd_256, INTR_TYPE_2OP, X86ISD::FHSUB, 0), @@ -1638,6 +1639,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTPD2DQ, 0), + X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 2dd6310..10b7117 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -354,10 +354,15 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { -; CHECK-LABEL: test_x86_sse2_cvttps2dq: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0] -; CHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse2_cvttps2dq: +; AVX: ## BB#0: +; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_sse2_cvttps2dq: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -2899,10 +2904,15 @@ declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) { -; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256: -; CHECK: ## BB#0: -; CHECK-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0xc5,0xfe,0x5b,0xc0] -; CHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_avx_cvtt_ps2dq_256: +; AVX: ## BB#0: +; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0xc5,0xfe,0x5b,0xc0] +; AVX-NEXT: retl ## encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_cvtt_ps2dq_256: +; AVX512VL: ## BB#0: +; AVX512VL-NEXT: vcvttps2dq %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x5b,0xc0] +; AVX512VL-NEXT: retl ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res } diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index 2e5f8a5..f09bcfa 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -543,10 +543,15 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { ; SSE-NEXT: cvttps2dq %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x5b,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_cvttps2dq: -; VCHECK: ## BB#0: -; VCHECK-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_cvttps2dq: +; AVX2: ## BB#0: +; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5b,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_cvttps2dq: +; SKX: ## BB#0: +; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x5b,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } -- 2.7.4