From 00afa193f1e8596fd2fa7a340f6cabcbb6e8eb3a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 11 Mar 2019 06:01:04 +0000 Subject: [PATCH] [X86] Enable sse2_cvtsd2ss intrinsic to use an EVEX encoded instruction. llvm-svn: 355810 --- llvm/lib/Target/X86/X86InstrSSE.td | 16 ++--- llvm/lib/Target/X86/X86IntrinsicsInfo.h | 1 + llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 42 +++++++++---- llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 71 +++++++++++++++------- 4 files changed, 87 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 784aa17..7f1cc8f 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1215,28 +1215,28 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>, - XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>, + (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, + XD, VEX_4V, VEX_WIG, Requires<[UseAVX]>, Sched<[WriteCvtSD2SS]>; def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss - VR128:$src1, sse_load_f64:$src2))]>, - XD, VEX_4V, VEX_WIG, Requires<[HasAVX]>, + [(set VR128:$dst, + (v4f32 (X86frounds VR128:$src1, sse_load_f64:$src2)))]>, + XD, VEX_4V, VEX_WIG, Requires<[UseAVX]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; let Constraints = "$src1 = $dst" in { def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))]>, + (v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS]>; def CVTSD2SSrm_Int: I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss - VR128:$src1, sse_load_f64:$src2))]>, + [(set VR128:$dst, + (v4f32 (X86frounds VR128:$src1,sse_load_f64:$src2)))]>, XD, Requires<[UseSSE2]>, Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>; } diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 63b6620..41ef56b 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1017,6 +1017,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvtsd2si, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0), X86_INTRINSIC_DATA(sse2_cvtsd2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, 0), + X86_INTRINSIC_DATA(sse2_cvtsd2ss, INTR_TYPE_2OP, X86ISD::VFPROUNDS, 0), X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttsd2si, INTR_TYPE_1OP, X86ISD::CVTTS2SI, 0), diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 477ce13..beb8955 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1553,10 +1553,15 @@ define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { ; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1] ; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtsd_ss: -; AVX: # %bb.0: -; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1] -; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; AVX1-LABEL: test_mm_cvtsd_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtsd_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ret <4 x float> %res } @@ -1569,21 +1574,32 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00] ; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_cvtsd_ss_load: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00] -; X86-AVX-NEXT: retl # encoding: [0xc3] +; X86-AVX1-LABEL: test_mm_cvtsd_ss_load: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_cvtsd_ss_load: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_cvtsd_ss_load: ; X64-SSE: # %bb.0: ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07] ; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_cvtsd_ss_load: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07] -; X64-AVX-NEXT: retq # encoding: [0xc3] +; X64-AVX1-LABEL: test_mm_cvtsd_ss_load: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_cvtsd_ss_load: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ret <4 x float> %res diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index f8a9074..44585b7 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -423,10 +423,15 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { ; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5a,0xc1] ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX-LABEL: test_x86_sse2_cvtsd2ss: -; AVX: ## %bb.0: -; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1] -; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; AVX1-LABEL: test_x86_sse2_cvtsd2ss: +; AVX1: ## %bb.0: +; AVX1-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse2_cvtsd2ss: +; AVX512: ## %bb.0: +; AVX512-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -440,21 +445,32 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, <2 x double>* % ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00] ; X86-SSE-NEXT: retl ## encoding: [0xc3] ; -; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load: -; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00] -; X86-AVX-NEXT: retl ## encoding: [0xc3] +; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load: +; X86-AVX1: ## %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX1-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX512-NEXT: retl ## encoding: [0xc3] ; ; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load: ; X64-SSE: ## %bb.0: ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07] ; X64-SSE-NEXT: retq ## encoding: [0xc3] ; -; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load: -; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07] -; X64-AVX-NEXT: retq ## encoding: [0xc3] +; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX1-NEXT: retq ## encoding: [0xc3] +; +; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res @@ -468,21 +484,32 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, <2 x do ; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x00] ; X86-SSE-NEXT: retl ## encoding: [0xc3] ; -; X86-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: -; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00] -; X86-AVX-NEXT: retl ## encoding: [0xc3] +; X86-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: +; X86-AVX1: ## %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX1-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX512-NEXT: retl ## encoding: [0xc3] ; ; X64-SSE-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: ; X64-SSE: ## %bb.0: ; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0x5a,0x07] ; X64-SSE-NEXT: retq ## encoding: [0xc3] ; -; X64-AVX-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: -; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07] -; X64-AVX-NEXT: retq ## encoding: [0xc3] +; X64-AVX1-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX1-NEXT: retq ## encoding: [0xc3] +; +; X64-AVX512-LABEL: test_x86_sse2_cvtsd2ss_load_optsize: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res -- 2.7.4