From 22780731255020ce78cf3671a4af94c4a2e9ac03 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 9 Feb 2020 17:51:53 +0000 Subject: [PATCH] [X86][SSE] Add more tests showing failure to lower shuffles as bit rotations --- llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll | 44 ++++++++++++-- llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | 76 +++++++++++++++++++++++++ llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll | 72 +++++++++++++++++++++++ llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll | 18 ++++++ llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll | 38 +++++++++++++ 5 files changed, 242 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index c813409..2049c1f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -1773,32 +1773,64 @@ define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_01_02_03_04_05_06( ret <16 x i8> %shuffle } +define <16 x i8> @shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14(<16 x i8> %a) { +; SSE2-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,4,5,6] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7] +; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] +; SSE2-NEXT: packuswb %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; SSSE3: # %bb.0: +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; SSE41: # %bb.0: +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] +; SSE41-NEXT: retq +; +; AVX-LABEL: shuffle_v16i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; AVX: # %bb.0: +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] +; AVX-NEXT: retq + %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle +} + ; PR44379 -define <16 x i8> @shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_013_14_15_08_09(<16 x i8> %a) { -; SSE-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_013_14_15_08_09: +define <16 x i8> @shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09(<16 x i8> %a) { +; SSE-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: ; SSE: # %bb.0: ; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] ; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] ; SSE-NEXT: retq ; -; AVX1-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_013_14_15_08_09: +; AVX1-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: ; AVX1: # %bb.0: ; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] ; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] ; AVX1-NEXT: retq ; -; AVX2-SLOW-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_013_14_15_08_09: +; AVX2-SLOW-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: ; AVX2-SLOW: # %bb.0: ; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] ; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] ; AVX2-SLOW-NEXT: retq ; -; AVX2-FAST-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_013_14_15_08_09: +; AVX2-FAST-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: ; AVX2-FAST: # %bb.0: ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9] ; AVX2-FAST-NEXT: retq ; -; AVX512VL-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_013_14_15_08_09: +; AVX512VL-LABEL: shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9] ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 95669b3..28c19e3 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -2294,6 +2294,82 @@ define <8 x i16> @shuffle_v8i16_467uu675(<8 x i16> %a) { ret <8 x i16> %shuffle } +define <8 x i16> @shuffle_v8i16_10325476(<8 x i16> %a) { +; SSE-LABEL: shuffle_v8i16_10325476: +; SSE: # %bb.0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] +; SSE-NEXT: retq +; +; AVX1-LABEL: shuffle_v8i16_10325476: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: shuffle_v8i16_10325476: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v8i16_10325476: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v8i16_10325476: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v8i16_10325476: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] +; AVX512VL-FAST-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %shuffle +} + +define <8 x i16> @shuffle_v8i16_12305674(<8 x i16> %a) { +; SSE-LABEL: shuffle_v8i16_12305674: +; SSE: # %bb.0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; SSE-NEXT: retq +; +; AVX1-LABEL: shuffle_v8i16_12305674: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: shuffle_v8i16_12305674: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v8i16_12305674: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v8i16_12305674: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7] +; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,7,4] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v8i16_12305674: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9] +; AVX512VL-FAST-NEXT: retq + %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %shuffle +} + ; ; Shuffle to logical bit shifts ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 001288f..087f38d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -4508,6 +4508,78 @@ define <16 x i16> @shuffle_v16i16_00_01_02_04_07_06_05_04_08_09_10_11_15_14_13_1 ret <16 x i16> %3 } +define <16 x i16> @shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14(<16 x i16> %a) { +; AVX1-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] +; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,4,7,6] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] +; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v16i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29] +; AVX512VL-FAST-NEXT: retq + %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> + ret <16 x i16> %shuffle +} + +define <16 x i16> @shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14(<16 x i16> %a) { +; AVX1-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[3,0,1,2,4,5,6,7] +; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,4,5,6] +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,0,1,2,4,5,6,7] +; AVX1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6] +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; AVX2-SLOW: # %bb.0: +; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15] +; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; AVX2-FAST: # %bb.0: +; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,2,3,4,5,14,15,8,9,10,11,12,13,22,23,16,17,18,19,20,21,30,31,24,25,26,27,28,29] +; AVX2-FAST-NEXT: retq +; +; AVX512VL-SLOW-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15] +; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14] +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v16i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,0,1,2,3,4,5,14,15,8,9,10,11,12,13,22,23,16,17,18,19,20,21,30,31,24,25,26,27,28,29] +; AVX512VL-FAST-NEXT: retq + %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> + ret <16 x i16> %shuffle +} + define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) { ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector: ; ALL: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 6b04bad..5bd9e2d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -3058,6 +3058,24 @@ define <32 x i8> @shuffle_v32i8_31_32_33_34_35_36_37_38_39_40_41_42_43_44_45_46_ ret <32 x i8> %shuffle } +define <32 x i8> @shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i8> %a) { +; AVX1-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14] +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2OR512VL-LABEL: shuffle_v32i8_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14,19,16,17,18,23,20,21,22,27,24,25,26,31,28,29,30] +; AVX2OR512VL-NEXT: retq + %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> + ret <32 x i8> %shuffle +} + ; PR44379 define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25(<32 x i8> %a) { ; AVX1-LABEL: shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_18_19_20_21_22_23_16_17_26_27_28_29_30_31_24_25: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index c350e7f..99742b6 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -197,6 +197,44 @@ define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19 ret <32 x i16> %c } +define <32 x i16> @shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30(<32 x i16> %a) { +; KNL-LABEL: shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30: +; KNL: ## %bb.0: +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; KNL-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] +; KNL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] +; KNL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] +; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: shuffle_v32i16_01_00_03_02_05_04_07_06_09_08_11_10_13_12_15_14_17_16_19_18_21_20_23_22_25_24_27_26_29_28_31_30: +; SKX: ## %bb.0: +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29,34,35,32,33,38,39,36,37,42,43,40,41,46,47,44,45,50,51,48,49,54,55,52,53,58,59,56,57,62,63,60,61] +; SKX-NEXT: retq + %shuffle = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> + ret <32 x i16> %shuffle +} + +define <32 x i16> @shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30(<32 x i16> %a) { +; KNL-LABEL: shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: +; KNL: ## %bb.0: +; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; KNL-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15] +; KNL-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14] +; KNL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,0,1,2,4,5,6,7,11,8,9,10,12,13,14,15] +; KNL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14] +; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: shuffle_v32i16_03_00_01_02_07_04_05_06_11_08_09_10_15_12_13_14_19_16_17_18_23_20_21_22_27_24_25_26_31_28_29_30: +; SKX: ## %bb.0: +; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[6,7,0,1,2,3,4,5,14,15,8,9,10,11,12,13,22,23,16,17,18,19,20,21,30,31,24,25,26,27,28,29,38,39,32,33,34,35,36,37,46,47,40,41,42,43,44,45,54,55,48,49,50,51,52,53,62,63,56,57,58,59,60,61] +; SKX-NEXT: retq + %shuffle = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> + ret <32 x i16> %shuffle +} + define <32 x i16> @shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<32 x i16> %a) { ; KNL-LABEL: shuffle_v32i16_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; KNL: ## %bb.0: -- 2.7.4