From 9b04d69ccefb57ec202a0ecdb36a84ed0568aead Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 29 Jun 2020 16:26:53 -0700 Subject: [PATCH] [X86] Prefer AND over PSHUFB for v64i8 when possible If the shuffle is a blend and one input is a 0 vector, we should prefer AND over PSHUFB since its available on more execution ports. Differential Revision: https://reviews.llvm.org/D82798 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +++++ llvm/test/CodeGen/X86/avx512-mask-op.ll | 6 +++--- llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll | 6 ++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f24dfb0..a1a2c2f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17418,6 +17418,11 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef Mask, lowerShuffleAsBitRotate(DL, MVT::v64i8, V1, Mask, Subtarget, DAG)) return Rotate; + // Lower as AND if possible. + if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) + return Masked; + if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG)) return PSHUFB; diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 4b3c52d..98b2f30 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -1992,12 +1992,12 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; ; SKX-LABEL: test_build_vec_v64i1: ; SKX: ## %bb.0: -; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero +; SKX-NEXT: vandps {{.*}}(%rip), %zmm0, %zmm0 ; SKX-NEXT: retq ; ; AVX512BW-LABEL: test_build_vec_v64i1: ; AVX512BW: ## %bb.0: -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_build_vec_v64i1: @@ -2010,7 +2010,7 @@ define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) { ; ; X86-LABEL: test_build_vec_v64i1: ; X86: ## %bb.0: -; X86-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero +; X86-NEXT: vandps LCPI43_0, %zmm0, %zmm0 ; X86-NEXT: retl %ret = select <64 x i1> , <64 x i8> %x, <64 x i8> zeroinitializer ret <64 x i8> %ret diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 2cf8ab1..fb40e96 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -115,7 +115,8 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) { ; ; AVX512BW-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,0,0] +; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: @@ -126,7 +127,8 @@ define <64 x i8> @shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz(<64 x i8> %a) { ; ; AVX512VBMI-LABEL: shuffle_v64i8_0zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz: ; AVX512VBMI: # %bb.0: -; AVX512VBMI-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,0,0] +; AVX512VBMI-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VBMI-NEXT: retq %shuffle = shufflevector <64 x i8> %a, <64 x i8> zeroinitializer, <64 x i32> ret <64 x i8> %shuffle -- 2.7.4