From b4b6e77454b6822e1ba7bdaa1b5bde8654c3e87f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 28 Jul 2020 19:48:35 +0100 Subject: [PATCH] [DAG] isSplatValue - add support for TRUNCATE/SIGN_EXTEND/ZERO_EXTEND These are just pass-throughs to the source operand - we can't assume that ANY_EXTEND(splat) will still be a splat though. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 + llvm/test/CodeGen/X86/vector-fshl-128.ll | 96 ++++++++++----------- llvm/test/CodeGen/X86/vector-fshl-256.ll | 48 +++++------ llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 38 ++++---- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 36 ++++---- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 64 +++++++------- llvm/test/CodeGen/X86/vector-fshr-128.ll | 100 +++++++++++----------- llvm/test/CodeGen/X86/vector-fshr-256.ll | 48 +++++------ llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 38 ++++---- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 36 ++++---- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 64 +++++++------- llvm/test/CodeGen/X86/vector-rotate-128.ll | 36 ++++---- llvm/test/CodeGen/X86/vector-rotate-256.ll | 32 +++---- llvm/test/CodeGen/X86/vector-shift-ashr-128.ll | 20 ++--- llvm/test/CodeGen/X86/vector-shift-ashr-256.ll | 10 +-- llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll | 60 ++++++------- llvm/test/CodeGen/X86/vector-shift-lshr-128.ll | 20 ++--- llvm/test/CodeGen/X86/vector-shift-lshr-256.ll | 10 +-- llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll | 60 ++++++------- llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 20 ++--- llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 10 +-- llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll | 60 ++++++------- 22 files changed, 424 insertions(+), 486 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 592c09c..87d2fa1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2323,6 +2323,10 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } break; } + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + return isSplatValue(V.getOperand(0), DemandedElts, UndefElts); } // We don't support other cases than those above for scalable vectors at diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 0192d1e..81c291f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -1947,16 +1947,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512F-LABEL: splatvar_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; AVX512F-NEXT: vpsllvd %zmm4, %zmm3, %zmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512F-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 +; AVX512F-NEXT: vpsrld %xmm3, %zmm1, %zmm1 +; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpslld %xmm4, %zmm3, %zmm3 ; AVX512F-NEXT: vpord %zmm1, %zmm3, %zmm1 ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 @@ -1967,16 +1967,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VL-LABEL: splatvar_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; AVX512VL-NEXT: vpsllvd %zmm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm1, %zmm1 +; AVX512VL-NEXT: vpsrld %xmm3, %zmm1, %zmm1 +; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpslld %xmm4, %zmm3, %zmm3 ; AVX512VL-NEXT: vpord %zmm1, %zmm3, %zmm1 ; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 @@ -1988,16 +1988,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512BW-LABEL: splatvar_funnnel_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512BW-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw %xmm3, %ymm1, %ymm1 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm4, %ymm3, %ymm3 ; AVX512BW-NEXT: vpor %ymm1, %ymm3, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 @@ -2009,16 +2009,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8: ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 +; AVX512VBMI2-NEXT: vpsrlw %xmm3, %ymm1, %ymm1 +; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI2-NEXT: vpsllw %xmm4, %ymm3, %ymm3 ; AVX512VBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1 ; AVX512VBMI2-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 @@ -2029,16 +2029,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VLBW-NEXT: vpsllvw %ymm4, %ymm3, %ymm3 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm1, %ymm1 +; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm4, %ymm3, %ymm3 ; AVX512VLBW-NEXT: vpor %ymm1, %ymm3, %ymm1 ; AVX512VLBW-NEXT: vpmovwb %ymm1, %xmm1 ; AVX512VLBW-NEXT: vptestnmb %xmm2, %xmm2, %k1 @@ -2049,16 +2049,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; ; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8: ; AVX512VLVBMI2: # %bb.0: -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VLVBMI2-NEXT: vpsllvw %ymm4, %ymm3, %ymm3 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VLVBMI2-NEXT: vpsrlvw %ymm4, %ymm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %ymm1, %ymm1 +; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %ymm3, %ymm3 ; AVX512VLVBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1 ; AVX512VLVBMI2-NEXT: vpmovwb %ymm1, %xmm1 ; AVX512VLVBMI2-NEXT: vptestnmb %xmm2, %xmm2, %k1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index 0cf4c17..afe7716 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -1526,13 +1526,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512BW-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm4, %zmm3, %zmm3 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 @@ -1546,13 +1546,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VBMI2-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm3, %zmm3 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 +; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VBMI2-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 @@ -1565,13 +1565,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VLBW-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm3, %zmm3 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512VLBW-NEXT: vptestnmb %ymm2, %ymm2, %k1 @@ -1584,13 +1584,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VLVBMI2-NEXT: vpsllvw %zmm4, %zmm3, %zmm3 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLVBMI2-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm3, %zmm3 +; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLVBMI2-NEXT: vpsrlvw %zmm4, %zmm1, %zmm1 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 ; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VLVBMI2-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512VLVBMI2-NEXT: vptestnmb %ymm2, %ymm2, %k1 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index 59bef3a..b1eb2fd 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1056,17 +1056,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512F-LABEL: splatvar_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpsllvd %zmm3, %zmm0, %zmm3 +; AVX512F-NEXT: vpslld %xmm3, %zmm0, %zmm3 ; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512F-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -1074,17 +1073,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VL-NEXT: vpsllvd %zmm3, %zmm0, %zmm3 +; AVX512VL-NEXT: vpslld %xmm3, %zmm0, %zmm3 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VL-NEXT: vzeroupper @@ -1092,17 +1090,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512BW-LABEL: splatvar_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero -; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512BW-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -1111,17 +1108,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VLBW-NEXT: vpsllvw %ymm3, %ymm0, %ymm3 +; AVX512VLBW-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512VLBW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index fd0e1c7..7db474f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -841,34 +841,34 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512BW-LABEL: splatvar_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero +; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 +; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubb %ymm1, %ymm4, %ymm1 -; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 +; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm4, %ymm1 -; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index ff17767..2481b8e 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -366,50 +366,50 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-NEXT: vpsllw %xmm3, %ymm4, %ymm5 +; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsubw %xmm1, %xmm5, %xmm1 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm3, %zmm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512F-NEXT: vpsrld %xmm1, %zmm2, %zmm1 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512VL-NEXT: vpsllw %xmm3, %ymm4, %ymm5 +; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsubw %xmm1, %xmm5, %xmm1 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm3, %zmm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512VL-NEXT: vpsrld %xmm1, %zmm2, %zmm1 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 817bca0..db76d3e 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1967,18 +1967,18 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; AVX512F-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm4 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsrld %xmm5, %zmm3, %zmm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512F-NEXT: vpsubb %xmm2, %xmm5, %xmm2 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpsllvd %zmm4, %zmm0, %zmm0 +; AVX512F-NEXT: vpslld %xmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512F-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512F-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm2 ; AVX512F-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1987,18 +1987,18 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm3, %zmm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero,xmm4[4],zero,zero,zero,xmm4[5],zero,zero,zero,xmm4[6],zero,zero,zero,xmm4[7],zero,zero,zero,xmm4[8],zero,zero,zero,xmm4[9],zero,zero,zero,xmm4[10],zero,zero,zero,xmm4[11],zero,zero,zero,xmm4[12],zero,zero,zero,xmm4[13],zero,zero,zero,xmm4[14],zero,zero,zero,xmm4[15],zero,zero,zero +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm4 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrld %xmm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm2, %xmm5, %xmm2 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VL-NEXT: vpsllvd %zmm4, %zmm0, %zmm0 +; AVX512VL-NEXT: vpslld %xmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX512VL-NEXT: vpcmpeqb %xmm3, %xmm2, %xmm2 +; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX512VL-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm2 ; AVX512VL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq @@ -2008,17 +2008,17 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %xmm4 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm5, %ymm3, %ymm3 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT: vpsubb %xmm2, %xmm5, %xmm2 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; AVX512BW-NEXT: vpsllvw %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; AVX512BW-NEXT: vptestnmb %zmm4, %zmm4, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -2029,17 +2029,17 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VBMI2-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %xmm4 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI2-NEXT: vpsrlw %xmm5, %ymm3, %ymm3 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm5, %xmm2 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero -; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 -; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 +; AVX512VBMI2-NEXT: vptestnmb %zmm4, %zmm4, %k1 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512VBMI2-NEXT: vzeroupper @@ -2049,17 +2049,17 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VLBW-NEXT: vpsrlvw %ymm4, %ymm3, %ymm3 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %xmm4 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm5, %ymm3, %ymm3 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm5, %xmm2 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VLBW-NEXT: vpsllvw %ymm4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0 -; AVX512VLBW-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; AVX512VLBW-NEXT: vptestnmb %xmm4, %xmm4, %k1 ; AVX512VLBW-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VLBW-NEXT: vzeroupper ; AVX512VLBW-NEXT: retq @@ -2068,17 +2068,17 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> % ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm2 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VLVBMI2-NEXT: vpsrlvw %ymm4, %ymm3, %ymm3 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %xmm4 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLVBMI2-NEXT: vpsrlw %xmm5, %ymm3, %ymm3 +; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm5, %xmm2 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VLVBMI2-NEXT: vpsllvw %ymm4, %ymm0, %ymm0 +; AVX512VLVBMI2-NEXT: vpsllw %xmm2, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLVBMI2-NEXT: vpmovwb %ymm0, %xmm0 -; AVX512VLVBMI2-NEXT: vptestnmb %xmm2, %xmm2, %k1 +; AVX512VLVBMI2-NEXT: vptestnmb %xmm4, %xmm4, %k1 ; AVX512VLVBMI2-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} ; AVX512VLVBMI2-NEXT: vzeroupper ; AVX512VLVBMI2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index f0848cf..6449d58 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -1528,13 +1528,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512BW-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm4, %zmm3, %zmm3 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpsllvw %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 @@ -1548,13 +1548,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VBMI2-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VBMI2-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm3, %zmm3 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VBMI2-NEXT: vpsllvw %zmm4, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 @@ -1567,13 +1567,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VLBW-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm3, %zmm3 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLBW-NEXT: vpsllvw %zmm4, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: vptestnmb %ymm2, %ymm2, %k1 @@ -1585,13 +1585,13 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512VLVBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %ymm2 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VLVBMI2-NEXT: vpsrlvw %zmm4, %zmm3, %zmm3 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} ymm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLVBMI2-NEXT: vpsubb %ymm2, %ymm4, %ymm4 -; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm4 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero,ymm4[16],zero,ymm4[17],zero,ymm4[18],zero,ymm4[19],zero,ymm4[20],zero,ymm4[21],zero,ymm4[22],zero,ymm4[23],zero,ymm4[24],zero,ymm4[25],zero,ymm4[26],zero,ymm4[27],zero,ymm4[28],zero,ymm4[29],zero,ymm4[30],zero,ymm4[31],zero +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm3, %zmm3 +; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLVBMI2-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLVBMI2-NEXT: vpsllvw %zmm4, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLVBMI2-NEXT: vptestnmb %ymm2, %ymm2, %k1 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 8fe7ba9..6cda97e 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1132,17 +1132,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512F-LABEL: splatvar_funnnel_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpsrlvd %zmm3, %zmm0, %zmm3 +; AVX512F-NEXT: vpsrld %xmm3, %zmm0, %zmm3 ; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512F-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -1150,17 +1149,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm0, %zmm3 +; AVX512VL-NEXT: vpsrld %xmm3, %zmm0, %zmm3 ; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VL-NEXT: vzeroupper @@ -1168,17 +1166,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512BW-LABEL: splatvar_funnnel_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512BW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -1187,17 +1184,16 @@ define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VLBW-NEXT: vpsrlvw %ymm3, %ymm0, %ymm3 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm3, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512VLBW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 4e92bfc..fffa286 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -916,34 +916,34 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512BW-LABEL: splatvar_funnnel_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero +; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 +; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 ; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubb %ymm1, %ymm4, %ymm1 -; AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm3 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero,ymm3[16],zero,ymm3[17],zero,ymm3[18],zero,ymm3[19],zero,ymm3[20],zero,ymm3[21],zero,ymm3[22],zero,ymm3[23],zero,ymm3[24],zero,ymm3[25],zero,ymm3[26],zero,ymm3[27],zero,ymm3[28],zero,ymm3[29],zero,ymm3[30],zero,ymm3[31],zero +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 ; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm4, %ymm1 -; AVX512VLBW-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index fda0dac..04a8831 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -366,50 +366,50 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm1, %ymm1 -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm2 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsrlw %xmm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512F-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm4, %ymm5 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512F-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512F-NEXT: vpsubw %xmm1, %xmm5, %xmm1 +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsllvd %zmm1, %zmm3, %zmm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512F-NEXT: vpslld %xmm1, %zmm2, %zmm1 ; AVX512F-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm1, %ymm1 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm1, %zmm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm2 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm3, %ymm4 -; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 -; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm4, %ymm1 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpbroadcastw %xmm1, %xmm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm4, %ymm5 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm5, %zmm3, %zmm3 +; AVX512VL-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsubw %xmm1, %xmm5, %xmm1 +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsllvd %zmm1, %zmm3, %zmm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm4[0],zero,ymm4[1],zero,ymm4[2],zero,ymm4[3],zero,ymm4[4],zero,ymm4[5],zero,ymm4[6],zero,ymm4[7],zero,ymm4[8],zero,ymm4[9],zero,ymm4[10],zero,ymm4[11],zero,ymm4[12],zero,ymm4[13],zero,ymm4[14],zero,ymm4[15],zero +; AVX512VL-NEXT: vpslld %xmm1, %zmm2, %zmm1 ; AVX512VL-NEXT: vpmovdw %zmm1, %ymm1 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index d140fb5..6f11c5c 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -1032,14 +1032,13 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512F-LABEL: splatvar_rotate_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512F-NEXT: vpsubb %xmm1, %xmm2, %xmm2 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm1 -; AVX512F-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 +; AVX512F-NEXT: vpslld %xmm1, %zmm0, %zmm1 +; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512F-NEXT: vpsrld %xmm2, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm0, %zmm1, %zmm0 ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -1047,14 +1046,13 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512VL-LABEL: splatvar_rotate_v16i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm2, %xmm2 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm1 -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero,xmm2[4],zero,zero,zero,xmm2[5],zero,zero,zero,xmm2[6],zero,zero,zero,xmm2[7],zero,zero,zero,xmm2[8],zero,zero,zero,xmm2[9],zero,zero,zero,xmm2[10],zero,zero,zero,xmm2[11],zero,zero,zero,xmm2[12],zero,zero,zero,xmm2[13],zero,zero,zero,xmm2[14],zero,zero,zero,xmm2[15],zero,zero,zero -; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vpslld %xmm1, %zmm0, %zmm1 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrld %xmm2, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm0, %zmm1, %zmm0 ; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VL-NEXT: vzeroupper @@ -1062,14 +1060,13 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512BW-LABEL: splatvar_rotate_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512BW-NEXT: vpsubb %xmm1, %xmm2, %xmm2 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -1078,14 +1075,13 @@ define <16 x i8> @splatvar_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512VLBW-LABEL: splatvar_rotate_v16i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512VLBW-NEXT: vpsrlvw %ymm2, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm1 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512VLBW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index a850ab5..f54aea8 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -831,29 +831,29 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BW-LABEL: splatvar_rotate_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT: vpsubb %ymm1, %ymm2, %ymm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT: vpsubb %xmm1, %xmm3, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT: vpsubb %ymm1, %ymm2, %ymm2 ; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm3, %xmm1 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512VLBW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 9b1fb29..b4bcca5 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -863,20 +863,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v16i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -884,20 +882,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v16i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v16i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index ae9c375..141b68d 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -933,10 +933,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BW-LABEL: splatvar_shift_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; @@ -956,10 +955,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BWVL-LABEL: splatvar_shift_v32i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512BWVL-NEXT: vpmovsxbw %ymm0, %zmm0 -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BWVL-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BWVL-NEXT: vpsraw %xmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BWVL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll index a994d661..bac8f61 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -1385,20 +1385,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v8i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1406,20 +1404,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v8i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v8i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1527,20 +1523,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v4i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1548,20 +1542,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v4i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1660,20 +1652,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v2i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v2i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1681,20 +1671,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v2i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsravd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrad %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v2i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsravw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 2e19f75..7dab12f 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -713,20 +713,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v16i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -734,20 +732,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v16i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v16i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index 9119e32..9ba3e4a 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -762,10 +762,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BW-LABEL: splatvar_shift_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; @@ -782,10 +781,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BWVL-LABEL: splatvar_shift_v32i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BWVL-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BWVL-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BWVL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll index 8a843ef..6b9ac09 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll @@ -1135,20 +1135,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v8i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1156,20 +1154,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v8i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v8i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1257,20 +1253,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v4i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1278,20 +1272,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v4i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1370,20 +1362,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v2i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v2i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1391,20 +1381,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v2i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpsrld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v2i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index 5f1325a..4b83700e 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -623,20 +623,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v16i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -644,20 +642,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v16i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v16i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index 1296fcf..8ed95ba 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -688,10 +688,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BW-LABEL: splatvar_shift_v32i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq ; @@ -707,10 +706,9 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512BWVL-LABEL: splatvar_shift_v32i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %ymm1 ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BWVL-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BWVL-NEXT: vpsllw %xmm1, %zmm0, %zmm0 ; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BWVL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll index 808033e..1c68808 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll @@ -988,20 +988,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v8i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1009,20 +1007,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v8i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v8i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1105,20 +1101,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v4i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1126,20 +1120,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v4i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq @@ -1215,20 +1207,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; ; AVX512DQ-LABEL: splatvar_shift_v2i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQ-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQ-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQ-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: splatvar_shift_v2i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -1236,20 +1226,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind { ; ; AVX512DQVL-LABEL: splatvar_shift_v2i8: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512DQVL-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero -; AVX512DQVL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 +; AVX512DQVL-NEXT: vpslld %xmm1, %zmm0, %zmm0 ; AVX512DQVL-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQVL-NEXT: vzeroupper ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v2i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpbroadcastb %xmm1, %xmm1 +; AVX512BWVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BWVL-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 ; AVX512BWVL-NEXT: vzeroupper ; AVX512BWVL-NEXT: retq -- 2.7.4