From 04682939eb7e5ff06ec7e50cbe205b5c0069285f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 21 Sep 2019 06:44:29 +0000 Subject: [PATCH] [X86] Use sse_load_f32/f64 and timm in patterns for memory form of vgetmantss/sd. Previously we only matched scalar_to_vector and scalar load, but we should be able to narrow a vector load or match vzload. Also need to match TargetConstant instead of Constant. The register patterns were previously updated, but not the memory patterns. llvm-svn: 372458 --- llvm/lib/Target/X86/X86InstrAVX512.td | 7 +++---- llvm/test/CodeGen/X86/avx512-intrinsics.ll | 6 ++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index cd833b7..cb0d009 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -10280,12 +10280,11 @@ multiclass avx512_fp_scalar_imm opc, string OpcodeStr, SDNode OpNode, (i32 timm:$src3))>, Sched<[sched]>; defm rmi : AVX512_maskable_scalar, + (_.VT _.ScalarIntMemCPat:$src2), + (i32 timm:$src3))>, Sched<[sched.Folded, sched.ReadAfterFold]>; } } diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 90b211f..e94f16a 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4801,15 +4801,13 @@ define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x flo define <4 x float> @test_int_x86_avx512_mask_getmant_ss_load(<4 x float> %x0, <4 x float>* %x1p) { ; X64-LABEL: test_int_x86_avx512_mask_getmant_ss_load: ; X64: # %bb.0: -; X64-NEXT: vmovaps (%rdi), %xmm1 -; X64-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm0 +; X64-NEXT: vgetmantss $11, (%rdi), %xmm0, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_getmant_ss_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: vmovaps (%eax), %xmm1 -; X86-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm0 +; X86-NEXT: vgetmantss $11, (%eax), %xmm0, %xmm0 ; X86-NEXT: retl %x1 = load <4 x float>, <4 x float>* %x1p %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> undef, i8 -1, i32 4) -- 2.7.4