From e37cdbeeabfb17821b9ff5d2f42e9f440882dab8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 24 Jan 2020 11:16:16 +0000 Subject: [PATCH] [X86][SSE] Add shufps+shufps test for fold through commutation As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway. --- llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll | 28 ++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 4b012e7..0462caf 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) { %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %shuffle } + +define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) { +; SSE-LABEL: shuffle_mem_v4f32_0624: +; SSE: # %bb.0: +; SSE-NEXT: movaps (%rdi), %xmm1 +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3] +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX1OR2-LABEL: shuffle_mem_v4f32_0624: +; AVX1OR2: # %bb.0: +; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1 +; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0] +; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX1OR2-NEXT: retq +; +; AVX512VL-LABEL: shuffle_mem_v4f32_0624: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vmovaps (%rdi), %xmm2 +; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4] +; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1 +; AVX512VL-NEXT: vmovaps %xmm1, %xmm0 +; AVX512VL-NEXT: retq + %1 = load <4 x float>, <4 x float>* %a1 + %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> + ret <4 x float> %2 +} -- 2.7.4