From 27e01e675c1acd9f648847d96f296b611f22628f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 26 Apr 2019 18:02:14 +0000 Subject: [PATCH] [X86][AVX] Fold extract_subvector(broadcast(x)) -> broadcast(x) iff x has one use llvm-svn: 359332 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 +++++++ llvm/test/CodeGen/X86/avx512-hadd-hsub.ll | 6 ++---- llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll | 2 +- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 14 +++++++------- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 16 ++++++++-------- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 14 +++++++------- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 16 ++++++++-------- llvm/test/CodeGen/X86/vector-rotate-256.ll | 12 ++++++------ llvm/test/CodeGen/X86/vector-rotate-512.ll | 8 ++++---- 9 files changed, 50 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7aaf71a..23eec37 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42590,6 +42590,13 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, VT, SDLoc(N), InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements())); + // If we're extracting from a broadcast then we're better off just + // broadcasting to the smaller type directly, assuming this is the only use. + // As its a broadcast we don't care about the extraction index. + if (InVec.getOpcode() == X86ISD::VBROADCAST && InVec.hasOneUse() && + InVec.getOperand(0).getValueSizeInBits() <= VT.getSizeInBits()) + return DAG.getNode(X86ISD::VBROADCAST, SDLoc(N), VT, InVec.getOperand(0)); + // If we're extracting the lowest subvector and we're the only user, // we may be able to perform this with a smaller vector width. if (IdxVal == 0 && InVec.hasOneUse()) { diff --git a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll index f16d3bb..283f950 100644 --- a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll +++ b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll @@ -249,8 +249,7 @@ define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) { ; KNL-LABEL: fsub_noundef_ee: ; KNL: # %bb.0: ; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm0 -; KNL-NEXT: vbroadcastsd %xmm0, %zmm1 -; KNL-NEXT: vextractf32x4 $2, %zmm1, %xmm1 +; KNL-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] ; KNL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; KNL-NEXT: retq @@ -258,8 +257,7 @@ define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) { ; SKX-LABEL: fsub_noundef_ee: ; SKX: # %bb.0: ; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm0 -; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 -; SKX-NEXT: vextractf32x4 $2, %zmm1, %xmm1 +; SKX-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; SKX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index eca9506..abbeff1 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1819,7 +1819,7 @@ define <4 x i32> @test_16xi32_to_4xi32_perm_mask9(<16 x i32> %vec) { ; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2 ; CHECK-NEXT: vpermps %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 +; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 ; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index 75fb60f..b56b555 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -542,7 +542,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsllq %xmm3, %ymm0, %ymm3 @@ -625,7 +625,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero @@ -710,7 +710,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 @@ -723,7 +723,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; AVX512-LABEL: splatvar_funnnel_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 @@ -789,7 +789,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -810,7 +810,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512F-LABEL: splatvar_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -831,7 +831,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index 70dddd45..c096a6c 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -316,7 +316,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -332,7 +332,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -348,7 +348,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1 +; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero @@ -363,7 +363,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero @@ -383,7 +383,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -409,7 +409,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -435,7 +435,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero @@ -458,7 +458,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index f77423b..bc477c2e 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -587,7 +587,7 @@ define <4 x i64> @splatvar_funnnel_v4i64(<4 x i64> %x, <4 x i64> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v4i64: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastq %xmm1, %xmm1 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [63,63] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX2-NEXT: vpsrlq %xmm3, %ymm0, %ymm3 @@ -676,7 +676,7 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] @@ -769,7 +769,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -784,7 +784,7 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounw ; ; AVX512-LABEL: splatvar_funnnel_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -857,7 +857,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX2-LABEL: splatvar_funnnel_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpsubb %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -880,7 +880,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512F-LABEL: splatvar_funnnel_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpsubb %xmm1, %xmm2, %xmm1 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 @@ -903,7 +903,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm2, %xmm1 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index 7746e70..ee8e2a9 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -328,7 +328,7 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpsubw %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 @@ -346,7 +346,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpsubw %xmm2, %xmm3, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 @@ -364,7 +364,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastw %xmm1, %zmm1 +; AVX512BW-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero @@ -379,7 +379,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero @@ -399,7 +399,7 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vpsubb %xmm2, %xmm3, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 @@ -427,7 +427,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vpsubb %xmm2, %xmm3, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 @@ -455,7 +455,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero @@ -478,7 +478,7 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index ac50c36..d7ca744 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -609,7 +609,7 @@ define <8 x i32> @splatvar_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind { ; ; AVX2-LABEL: splatvar_rotate_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero @@ -697,7 +697,7 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; AVX2-LABEL: splatvar_rotate_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 @@ -710,7 +710,7 @@ define <16 x i16> @splatvar_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind ; ; AVX512-LABEL: splatvar_rotate_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpbroadcastw %xmm1, %ymm1 +; AVX512-NEXT: vpbroadcastw %xmm1, %xmm1 ; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512-NEXT: vpsllw %xmm2, %ymm0, %ymm2 @@ -779,7 +779,7 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX2-LABEL: splatvar_rotate_v32i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -800,7 +800,7 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512F-LABEL: splatvar_rotate_v32i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512F-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm3 @@ -821,7 +821,7 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; ; AVX512VL-LABEL: splatvar_rotate_v32i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %xmm1 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm3 diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index 01b4676..54725e3 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -311,7 +311,7 @@ define <16 x i32> @splatvar_rotate_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; AVX512F-LABEL: splatvar_rotate_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -327,7 +327,7 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind ; ; AVX512VL-LABEL: splatvar_rotate_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -373,7 +373,7 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512F-LABEL: splatvar_rotate_v64i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512F-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm4 @@ -399,7 +399,7 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; ; AVX512VL-LABEL: splatvar_rotate_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512VL-NEXT: vpbroadcastb %xmm2, %xmm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm4 -- 2.7.4