From: Simon Pilgrim Date: Thu, 1 Dec 2016 11:52:37 +0000 (+0000) Subject: [X86][SSE] Add support for combining ISD::AND with shuffles. X-Git-Tag: llvmorg-4.0.0-rc1~3283 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=947650e99d60452259cc3b9601a11bca588972ec;p=platform%2Fupstream%2Fllvm.git [X86][SSE] Add support for combining ISD::AND with shuffles. Attempts to convert an AND with a vector of 255 or 0 values into a shuffle (blend) mask. llvm-svn: 288333 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3e2f5f1..63f191f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5558,6 +5558,25 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, unsigned Opcode = N.getOpcode(); switch (Opcode) { + case ISD::AND: { + // Attempt to decode as a per-byte mask. + SmallBitVector UndefElts; + SmallVector EltBits; + if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits)) + return false; + for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { + if (UndefElts[i]) { + Mask.push_back(SM_SentinelUndef); + continue; + } + uint64_t ByteBits = EltBits[i].getZExtValue(); + if (ByteBits != 0 && ByteBits != 255) + return false; + Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i); + } + Ops.push_back(N.getOperand(0)); + return true; + } case X86ISD::VSHLI: case X86ISD::VSRLI: { uint64_t ShiftVal = N.getConstantOperandVal(1); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index 02a33a70..4fddbac 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -72,13 +72,11 @@ define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) { define <32 x i8> @combine_and_pshufb(<32 x i8> %a0) { ; X32-LABEL: combine_and_pshufb: ; X32: # BB#0: -; X32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm0 ; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1],zero,zero,zero,zero,zero,zero,ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17],zero,zero,zero,zero,zero,zero,ymm0[24,25],zero,zero,zero,zero,zero,zero ; X32-NEXT: retl ; ; X64-LABEL: combine_and_pshufb: ; X64: # BB#0: -; X64-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1],zero,zero,zero,zero,zero,zero,ymm0[8,9],zero,zero,zero,zero,zero,zero,ymm0[16,17],zero,zero,zero,zero,zero,zero,ymm0[24,25],zero,zero,zero,zero,zero,zero ; X64-NEXT: retq %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll index 95e86d2..b7aa819 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll @@ -277,8 +277,7 @@ define <16 x i8> @combine_pshufb_psrldq(<16 x i8> %a0) { define <16 x i8> @combine_and_pshufb(<16 x i8> %a0) { ; SSSE3-LABEL: combine_and_pshufb: ; SSSE3: # BB#0: -; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0 -; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: retq ; ; SSE41-LABEL: combine_and_pshufb: