From 8191307d0987ad1c18ae1cf391a43c4b54c2e41e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 30 Nov 2018 18:43:15 +0000 Subject: [PATCH] [X86] Prefer lowerVectorShuffleAsBitMask over using a avx512 masked operation when avx512bw/avx512vl is enabled. This does require a constant pool load instead of loading an immediate into a gpr, moving to a k register and masking. But its less instructions and more consistent with previous ISAs. It probably opens up more combine opportunities as one of the test cases demonstrates. llvm-svn: 348018 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++++----- llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll | 19 +++++-------------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0035335..890103d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -10175,6 +10175,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, assert((VT.is128BitVector() || Subtarget.hasAVX2()) && "256-bit byte-blends require AVX2 support!"); + // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. + if (SDValue Masked = + lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) + return Masked; + if (Subtarget.hasBWI() && Subtarget.hasVLX()) { MVT IntegerType = MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); @@ -10182,11 +10187,6 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); } - // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. - if (SDValue Masked = - lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) - return Masked; - // Scale the blend by the number of bytes per element. int Scale = VT.getScalarSizeInBits() / 8; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 0ae34e0..ba0e208 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1690,17 +1690,10 @@ define <32 x i8> @load_fold_pblendvb_commute(<32 x i8>* %px, <32 x i8> %y) { } define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) { -; AVX1OR2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: -; AVX1OR2: # %bb.0: -; AVX1OR2-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA -; AVX512VL-NEXT: kmovd %eax, %k1 -; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm0 {%k1} {z} -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31: +; ALL: # %bb.0: +; ALL-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> ret <32 x i8> %shuffle } @@ -2781,9 +2774,7 @@ define <32 x i8> @shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz ; AVX512VLBW-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX512VLBW-NEXT: movl $286331153, %eax # imm = 0x11111111 -; AVX512VLBW-NEXT: kmovd %eax, %k1 -; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,u,u,u,9,u,u,u,10,u,u,u,11,u,u,u,28,u,u,u,29,u,u,u,30,u,u,u,31,u,u,u] +; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8],zero,zero,zero,ymm0[9],zero,zero,zero,ymm0[10],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[28],zero,zero,zero,ymm0[29],zero,zero,zero,ymm0[30],zero,zero,zero,ymm0[31],zero,zero,zero ; AVX512VLBW-NEXT: retq ; ; AVX512VLVBMI-LABEL: shuffle_v32i8_56_zz_zz_zz_57_zz_zz_zz_58_zz_zz_zz__zz_59_zz_zz_zz_60_zz_zz_zz_61_zz_zz_zz_62_zz_zz_zz_63_zz_zz_zz: -- 2.7.4