From 3def9e11e2652aff5e084c764320981d9c5e786a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 28 Nov 2016 15:50:39 +0000 Subject: [PATCH] [X86][SSE] Added tests showing missed combines of shifts with shuffles. llvm-svn: 288037 --- .../CodeGen/X86/vector-shuffle-combining-avx2.ll | 54 ++++++++++++++++++++++ .../CodeGen/X86/vector-shuffle-combining-ssse3.ll | 54 ++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 89194c3..2be4a06 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -596,6 +596,60 @@ define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) { ret <32 x i8> %res1 } +define <32 x i8> @combine_psrlw_pshufb(<16 x i16> %a0) { +; X32-LABEL: combine_psrlw_pshufb: +; X32: # BB#0: +; X32-NEXT: vpsrlw $8, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; X32-NEXT: retl +; +; X64-LABEL: combine_psrlw_pshufb: +; X64: # BB#0: +; X64-NEXT: vpsrlw $8, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14,17,16,19,18,21,20,23,22,25,24,27,26,29,28,31,30] +; X64-NEXT: retq + %1 = lshr <16 x i16> %a0, + %2 = bitcast <16 x i16> %1 to <32 x i8> + %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> ) + ret <32 x i8> %3 +} + +define <32 x i8> @combine_pslld_pshufb(<8 x i32> %a0) { +; X32-LABEL: combine_pslld_pshufb: +; X32: # BB#0: +; X32-NEXT: vpslld $24, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; X32-NEXT: retl +; +; X64-LABEL: combine_pslld_pshufb: +; X64: # BB#0: +; X64-NEXT: vpslld $24, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12,19,18,17,16,23,22,21,20,27,26,25,24,31,30,29,28] +; X64-NEXT: retq + %1 = shl <8 x i32> %a0, + %2 = bitcast <8 x i32> %1 to <32 x i8> + %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> ) + ret <32 x i8> %3 +} + +define <32 x i8> @combine_psrlq_pshufb(<4 x i64> %a0) { +; X32-LABEL: combine_psrlq_pshufb: +; X32: # BB#0: +; X32-NEXT: vpsrlq $32, %ymm0, %ymm0 +; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23] +; X32-NEXT: retl +; +; X64-LABEL: combine_psrlq_pshufb: +; X64: # BB#0: +; X64-NEXT: vpsrlq $32, %ymm0, %ymm0 +; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,23,22,21,20,19,18,17,31,30,29,28,27,26,25,24,23] +; X64-NEXT: retq + %1 = lshr <4 x i64> %a0, + %2 = bitcast <4 x i64> %1 to <32 x i8> + %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> ) + ret <32 x i8> %3 +} + define <8 x i32> @constant_fold_permd() { ; X32-LABEL: constant_fold_permd: ; X32: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 7676e83..277d541 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -409,6 +409,60 @@ define <16 x i8> @combine_pshufb_as_unary_unpckhwd(<16 x i8> %a0) { ret <16 x i8> %1 } +define <16 x i8> @combine_psrlw_pshufb(<8 x i16> %a0) { +; SSE-LABEL: combine_psrlw_pshufb: +; SSE: # BB#0: +; SSE-NEXT: psrlw $8, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero +; SSE-NEXT: retq +; +; AVX-LABEL: combine_psrlw_pshufb: +; AVX: # BB#0: +; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero,xmm0[0],zero,zero,zero +; AVX-NEXT: retq + %1 = lshr <8 x i16> %a0, + %2 = bitcast <8 x i16> %1 to <16 x i8> + %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> ) + ret <16 x i8> %3 +} + +define <16 x i8> @combine_pslld_pshufb(<4 x i32> %a0) { +; SSE-LABEL: combine_pslld_pshufb: +; SSE: # BB#0: +; SSE-NEXT: pslld $8, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_pslld_pshufb: +; AVX: # BB#0: +; AVX-NEXT: vpslld $8, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] +; AVX-NEXT: retq + %1 = shl <4 x i32> %a0, + %2 = bitcast <4 x i32> %1 to <16 x i8> + %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> ) + ret <16 x i8> %3 +} + +define <16 x i8> @combine_psrlq_pshufb(<2 x i64> %a0) { +; SSE-LABEL: combine_psrlq_pshufb: +; SSE: # BB#0: +; SSE-NEXT: psrlq $48, %xmm0 +; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; SSE-NEXT: retq +; +; AVX-LABEL: combine_psrlq_pshufb: +; AVX: # BB#0: +; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8] +; AVX-NEXT: retq + %1 = lshr <2 x i64> %a0, + %2 = bitcast <2 x i64> %1 to <16 x i8> + %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> ) + ret <16 x i8> %3 +} + define <16 x i8> @combine_unpckl_arg0_pshufb(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: combine_unpckl_arg0_pshufb: ; SSE: # BB#0: -- 2.7.4