From: Craig Topper Date: Tue, 7 Nov 2017 04:44:22 +0000 (+0000) Subject: [X86] Use IMPLICIT_DEF in VEX/EVEX vcvtss2sd/vcvtsd2ss patterns instead of a COPY_TO_... X-Git-Tag: llvmorg-6.0.0-rc1~4015 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=afc3c8206e54df16d446f59e9f06ff431ff622ef;p=platform%2Fupstream%2Fllvm.git [X86] Use IMPLICIT_DEF in VEX/EVEX vcvtss2sd/vcvtsd2ss patterns instead of a COPY_TO_REGCLASS. ExeDepsFix pass should take care of making the registers match. llvm-svn: 317542 --- diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c2693ea..ac2ab1e 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6557,7 +6557,7 @@ defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), - (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, FR64X), FR32X:$src)>, + (VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>, Requires<[HasAVX512]>; def : Pat<(f64 (fpextend (loadf32 addr:$src))), (VCVTSS2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>, @@ -6572,7 +6572,7 @@ def : Pat<(f64 (extloadf32 addr:$src)), Requires<[HasAVX512, OptForSpeed]>; def : Pat<(f32 (fpround FR64X:$src)), - (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, FR32X), FR64X:$src)>, + (VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>, Requires<[HasAVX512]>; def : Pat<(v4f32 (X86Movss diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index c06b281..e47935a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1519,7 +1519,7 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), } def : Pat<(f32 (fpround FR64:$src)), - (VCVTSD2SSrr (COPY_TO_REGCLASS FR64:$src, FR32), FR64:$src)>, + (VCVTSD2SSrr (f32 (IMPLICIT_DEF)), FR64:$src)>, Requires<[UseAVX]>; def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), @@ -1584,7 +1584,7 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), } def : Pat<(f64 (fpextend FR32:$src)), - (VCVTSS2SDrr (COPY_TO_REGCLASS FR32:$src, FR64), FR32:$src)>, Requires<[UseAVX]>; + (VCVTSS2SDrr (f64 (IMPLICIT_DEF)), FR32:$src)>, Requires<[UseAVX]>; def : Pat<(fpextend (loadf32 addr:$src)), (VCVTSS2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 8ee56a4..9feff88 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -1564,25 +1564,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX1-LABEL: cvt_8i16_to_8f64: ; AVX1: # BB#0: ; AVX1-NEXT: vmovq %xmm0, %rdx -; AVX1-NEXT: movq %rdx, %r8 +; AVX1-NEXT: movq %rdx, %r9 ; AVX1-NEXT: movl %edx, %r10d -; AVX1-NEXT: movswl %dx, %r9d +; AVX1-NEXT: movswl %dx, %r8d ; AVX1-NEXT: shrq $48, %rdx -; AVX1-NEXT: shrq $32, %r8 +; AVX1-NEXT: shrq $32, %r9 ; AVX1-NEXT: shrl $16, %r10d ; AVX1-NEXT: vpextrq $1, %xmm0, %rdi -; AVX1-NEXT: movq %rdi, %rax -; AVX1-NEXT: movl %edi, %esi +; AVX1-NEXT: movq %rdi, %rsi +; AVX1-NEXT: movl %edi, %eax ; AVX1-NEXT: movswl %di, %ecx ; AVX1-NEXT: shrq $48, %rdi -; AVX1-NEXT: shrq $32, %rax -; AVX1-NEXT: shrl $16, %esi -; AVX1-NEXT: movswl %si, %esi -; AVX1-NEXT: vmovd %esi, %xmm0 +; AVX1-NEXT: shrq $32, %rsi +; AVX1-NEXT: shrl $16, %eax +; AVX1-NEXT: cwtl +; AVX1-NEXT: vmovd %eax, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1 ; AVX1-NEXT: vmovd %ecx, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX1-NEXT: cwtl +; AVX1-NEXT: movswl %si, %eax ; AVX1-NEXT: vmovd %eax, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3 ; AVX1-NEXT: movswl %di, %eax @@ -1591,9 +1591,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX1-NEXT: movswl %r10w, %eax ; AVX1-NEXT: vmovd %eax, %xmm0 ; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX1-NEXT: vmovd %r9d, %xmm5 +; AVX1-NEXT: vmovd %r8d, %xmm5 ; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX1-NEXT: movswl %r8w, %eax +; AVX1-NEXT: movswl %r9w, %eax ; AVX1-NEXT: vmovd %eax, %xmm6 ; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 ; AVX1-NEXT: movswl %dx, %eax @@ -1618,25 +1618,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX2-LABEL: cvt_8i16_to_8f64: ; AVX2: # BB#0: ; AVX2-NEXT: vmovq %xmm0, %rdx -; AVX2-NEXT: movq %rdx, %r8 +; AVX2-NEXT: movq %rdx, %r9 ; AVX2-NEXT: movl %edx, %r10d -; AVX2-NEXT: movswl %dx, %r9d +; AVX2-NEXT: movswl %dx, %r8d ; AVX2-NEXT: shrq $48, %rdx -; AVX2-NEXT: shrq $32, %r8 +; AVX2-NEXT: shrq $32, %r9 ; AVX2-NEXT: shrl $16, %r10d ; AVX2-NEXT: vpextrq $1, %xmm0, %rdi -; AVX2-NEXT: movq %rdi, %rax -; AVX2-NEXT: movl %edi, %esi +; AVX2-NEXT: movq %rdi, %rsi +; AVX2-NEXT: movl %edi, %eax ; AVX2-NEXT: movswl %di, %ecx ; AVX2-NEXT: shrq $48, %rdi -; AVX2-NEXT: shrq $32, %rax -; AVX2-NEXT: shrl $16, %esi -; AVX2-NEXT: movswl %si, %esi -; AVX2-NEXT: vmovd %esi, %xmm0 +; AVX2-NEXT: shrq $32, %rsi +; AVX2-NEXT: shrl $16, %eax +; AVX2-NEXT: cwtl +; AVX2-NEXT: vmovd %eax, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1 ; AVX2-NEXT: vmovd %ecx, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2 -; AVX2-NEXT: cwtl +; AVX2-NEXT: movswl %si, %eax ; AVX2-NEXT: vmovd %eax, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3 ; AVX2-NEXT: movswl %di, %eax @@ -1645,9 +1645,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX2-NEXT: movswl %r10w, %eax ; AVX2-NEXT: vmovd %eax, %xmm0 ; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 -; AVX2-NEXT: vmovd %r9d, %xmm5 +; AVX2-NEXT: vmovd %r8d, %xmm5 ; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX2-NEXT: movswl %r8w, %eax +; AVX2-NEXT: movswl %r9w, %eax ; AVX2-NEXT: vmovd %eax, %xmm6 ; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 ; AVX2-NEXT: movswl %dx, %eax @@ -1672,25 +1672,25 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX512-LABEL: cvt_8i16_to_8f64: ; AVX512: # BB#0: ; AVX512-NEXT: vpextrq $1, %xmm0, %rdx -; AVX512-NEXT: movq %rdx, %r8 +; AVX512-NEXT: movq %rdx, %r9 ; AVX512-NEXT: movl %edx, %r10d -; AVX512-NEXT: movswl %dx, %r9d +; AVX512-NEXT: movswl %dx, %r8d ; AVX512-NEXT: shrq $48, %rdx -; AVX512-NEXT: shrq $32, %r8 +; AVX512-NEXT: shrq $32, %r9 ; AVX512-NEXT: shrl $16, %r10d ; AVX512-NEXT: vmovq %xmm0, %rdi -; AVX512-NEXT: movq %rdi, %rax -; AVX512-NEXT: movl %edi, %esi +; AVX512-NEXT: movq %rdi, %rsi +; AVX512-NEXT: movl %edi, %eax ; AVX512-NEXT: movswl %di, %ecx ; AVX512-NEXT: shrq $48, %rdi -; AVX512-NEXT: shrq $32, %rax -; AVX512-NEXT: shrl $16, %esi -; AVX512-NEXT: movswl %si, %esi -; AVX512-NEXT: vmovd %esi, %xmm0 +; AVX512-NEXT: shrq $32, %rsi +; AVX512-NEXT: shrl $16, %eax +; AVX512-NEXT: cwtl +; AVX512-NEXT: vmovd %eax, %xmm0 ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 ; AVX512-NEXT: vmovd %ecx, %xmm1 ; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1 -; AVX512-NEXT: cwtl +; AVX512-NEXT: movswl %si, %eax ; AVX512-NEXT: vmovd %eax, %xmm2 ; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2 ; AVX512-NEXT: movswl %di, %eax @@ -1699,9 +1699,9 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX512-NEXT: movswl %r10w, %eax ; AVX512-NEXT: vmovd %eax, %xmm4 ; AVX512-NEXT: vcvtph2ps %xmm4, %xmm4 -; AVX512-NEXT: vmovd %r9d, %xmm5 +; AVX512-NEXT: vmovd %r8d, %xmm5 ; AVX512-NEXT: vcvtph2ps %xmm5, %xmm5 -; AVX512-NEXT: movswl %r8w, %eax +; AVX512-NEXT: movswl %r9w, %eax ; AVX512-NEXT: vmovd %eax, %xmm6 ; AVX512-NEXT: vcvtph2ps %xmm6, %xmm6 ; AVX512-NEXT: movswl %dx, %eax