From 52c02d70e276aa1e138b9f43988de60838aacb8c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 22 Dec 2018 16:59:02 +0000 Subject: [PATCH] [x86] add load fold patterns for movddup with vzext_load The missed load folding noticed in D55898 is visible independent of that change either with an adjusted IR pattern to start or with AVX2/AVX512 (where the build vector becomes a broadcast first; movddup is not produced until we get into isel via tablegen patterns). Differential Revision: https://reviews.llvm.org/D55936 llvm-svn: 350005 --- llvm/lib/Target/X86/X86InstrAVX512.td | 2 ++ llvm/lib/Target/X86/X86InstrSSE.td | 6 ++++++ llvm/test/CodeGen/X86/build-vector-128.ll | 6 ++---- llvm/test/CodeGen/X86/movddup-load-fold.ll | 6 ++---- llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | 3 +-- 5 files changed, 13 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7e60b9c..105ca2e 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -11217,6 +11217,8 @@ def : Pat<(v2f64 (X86VBroadcast f64:$src)), (VMOVDDUPZ128rr (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (VMOVDDUPZ128rm addr:$src)>; +def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))), + (VMOVDDUPZ128rm addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), (v2f64 VR128X:$src0)), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 94cd5a6..e2bcd18 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4669,12 +4669,16 @@ defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>; let Predicates = [HasAVX, NoVLX] in { def : Pat<(X86Movddup (loadv2f64 addr:$src)), (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; + def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))), + (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; } let Predicates = [UseSSE3] in { // No need for aligned memory as this only loads 64-bits. def : Pat<(X86Movddup (loadv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; + def : Pat<(X86Movddup (v2f64 (X86vzload addr:$src))), + (MOVDDUPrm addr:$src)>; } //===---------------------------------------------------------------------===// @@ -8034,6 +8038,8 @@ let Predicates = [HasAVX, NoVLX] in { (VMOVDDUPrr VR128:$src)>; def : Pat<(v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (VMOVDDUPrm addr:$src)>; + def : Pat<(v2f64 (X86VBroadcast (v2f64 (X86vzload addr:$src)))), + (VMOVDDUPrm addr:$src)>; } let Predicates = [HasAVX1Only] in { diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll index 1c1dab6..b80f6fa 100644 --- a/llvm/test/CodeGen/X86/build-vector-128.ll +++ b/llvm/test/CodeGen/X86/build-vector-128.ll @@ -526,8 +526,7 @@ define <4 x float> @PR37502(float %x, float %y) { ; ; SSE41-32-LABEL: PR37502: ; SSE41-32: # %bb.0: -; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE41-32-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE41-32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] ; SSE41-32-NEXT: retl ; ; SSE41-64-LABEL: PR37502: @@ -538,8 +537,7 @@ define <4 x float> @PR37502(float %x, float %y) { ; ; AVX-32-LABEL: PR37502: ; AVX-32: # %bb.0: -; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX-32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX-32-NEXT: retl ; ; AVX1-64-LABEL: PR37502: diff --git a/llvm/test/CodeGen/X86/movddup-load-fold.ll b/llvm/test/CodeGen/X86/movddup-load-fold.ll index a0e65fb..f1af6e9 100644 --- a/llvm/test/CodeGen/X86/movddup-load-fold.ll +++ b/llvm/test/CodeGen/X86/movddup-load-fold.ll @@ -9,14 +9,12 @@ define <4 x float> @movddup_load_fold(float %x, float %y) { ; SSE-LABEL: movddup_load_fold: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] ; SSE-NEXT: retl ; ; AVX-LABEL: movddup_load_fold: ; AVX: # %bb.0: -; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; AVX-NEXT: retl %i0 = insertelement <4 x float> zeroinitializer, float %x, i32 0 %i1 = insertelement <4 x float> %i0, float %y, i32 1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 5fe0a2b..37d13d1 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -332,8 +332,7 @@ define void @buildvector_v4f32_0404(float %a, float %b, <4 x float>* %ptr) { ; X86AVX2-LABEL: buildvector_v4f32_0404: ; X86AVX2: # %bb.0: ; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] ; X86AVX2-NEXT: vmovapd %xmm0, (%eax) ; X86AVX2-NEXT: retl ; -- 2.7.4