From 66e7dce714fabd3ddb1aed635e4b826476d4f1a2 Mon Sep 17 00:00:00 2001 From: Mitch Phillips <31459023+hctim@users.noreply.github.com> Date: Mon, 3 Aug 2020 13:48:30 -0700 Subject: [PATCH] Revert "[X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero." This reverts commit 219f32f4b68679563443cdaae7b8174c9976409a. Commit contains unsigned compasions that break bots that build with -Wsign-compare. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 65 +++++++------------------ llvm/test/CodeGen/X86/insertelement-ones.ll | 12 +++-- llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll | 8 +-- llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll | 5 +- llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll | 6 +-- 5 files changed, 35 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b2bfcc2..e9bb50a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7401,8 +7401,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, // Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other // is a valid shuffle index. - SDValue N0 = peekThroughBitcasts(N.getOperand(0)); - SDValue N1 = peekThroughBitcasts(N.getOperand(1)); + SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); + SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) return false; SmallVector SrcMask0, SrcMask1; @@ -7413,24 +7413,34 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, true)) return false; + // Shuffle inputs must be the same size as the result. + if (llvm::any_of(SrcInputs0, [VT](SDValue Op) { + return VT.getSizeInBits() != Op.getValueSizeInBits(); + })) + return false; + if (llvm::any_of(SrcInputs1, [VT](SDValue Op) { + return VT.getSizeInBits() != Op.getValueSizeInBits(); + })) + return false; + size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); SmallVector Mask0, Mask1; narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); - for (int i = 0; i != (int)MaskSize; ++i) { + for (size_t i = 0; i != MaskSize; ++i) { if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef) Mask.push_back(SM_SentinelUndef); else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) Mask.push_back(SM_SentinelZero); else if (Mask1[i] == SM_SentinelZero) - Mask.push_back(i); + Mask.push_back(Mask0[i]); else if (Mask0[i] == SM_SentinelZero) - Mask.push_back(i + MaskSize); + Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size())); else return false; } - Ops.push_back(N0); - Ops.push_back(N1); + Ops.append(SrcInputs0.begin(), SrcInputs0.end()); + Ops.append(SrcInputs1.begin(), SrcInputs1.end()); return true; } case ISD::INSERT_SUBVECTOR: { @@ -34209,7 +34219,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget, unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, bool IsUnary) { - unsigned NumMaskElts = Mask.size(); unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { @@ -34267,46 +34276,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, } } - // Attempt to match against a OR if we're performing a blend shuffle and the - // non-blended source element is zero in each case. - if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && - (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { - bool IsBlend = true; - unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); - unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); - unsigned Scale1 = NumV1Elts / NumMaskElts; - unsigned Scale2 = NumV2Elts / NumMaskElts; - APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); - APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); - for (unsigned i = 0; i != NumMaskElts; ++i) { - int M = Mask[i]; - if (M == SM_SentinelUndef) - continue; - if (M == SM_SentinelZero) { - DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); - DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); - continue; - } - if (M == i) { - DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); - continue; - } - if (M == (i + NumMaskElts)) { - DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); - continue; - } - IsBlend = false; - break; - } - if (IsBlend && - DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && - DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { - Shuffle = ISD::OR; - SrcVT = DstVT = EVT(MaskVT).changeTypeToInteger().getSimpleVT(); - return true; - } - } - return false; } diff --git a/llvm/test/CodeGen/X86/insertelement-ones.ll b/llvm/test/CodeGen/X86/insertelement-ones.ll index 6a9a401..3d8e42b 100644 --- a/llvm/test/CodeGen/X86/insertelement-ones.ll +++ b/llvm/test/CodeGen/X86/insertelement-ones.ll @@ -389,9 +389,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; SSE2-NEXT: pand %xmm5, %xmm1 ; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: pandn %xmm3, %xmm5 +; SSE2-NEXT: por %xmm5, %xmm1 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: por %xmm4, %xmm1 ; SSE2-NEXT: retq @@ -409,9 +411,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) { ; SSE3-NEXT: movdqa %xmm3, %xmm4 ; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0] ; SSE3-NEXT: por %xmm4, %xmm0 -; SSE3-NEXT: pand {{.*}}(%rip), %xmm1 +; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; SSE3-NEXT: pand %xmm5, %xmm1 ; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1] -; SSE3-NEXT: por %xmm3, %xmm1 +; SSE3-NEXT: pandn %xmm3, %xmm5 +; SSE3-NEXT: por %xmm5, %xmm1 ; SSE3-NEXT: pand %xmm2, %xmm1 ; SSE3-NEXT: por %xmm4, %xmm1 ; SSE3-NEXT: retq diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll index 9256a43..6b49f22 100644 --- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll +++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll @@ -1314,10 +1314,10 @@ define void @trunc_v4i64_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind { define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind { ; AVX1-LABEL: negative: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[u,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,zero,zero,zero,zero,zero,zero,xmm0[0,2,4,6,8,10,12,14] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll index 86423ce..f448f41 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll @@ -1713,8 +1713,9 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) { ; ; SSSE3-LABEL: shuffle_v8i16_XX4X8acX: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u] -; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u] +; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u] +; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: shuffle_v8i16_XX4X8acX: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index e5285ae..82df05e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -3358,9 +3358,9 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_ ; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3] ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero ; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u] -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7] +; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u] +; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u] +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255] ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 -- 2.7.4