From b446356bf33ef81f27ab84b9a804f153c2328217 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 21 Oct 2019 22:36:31 +0000 Subject: [PATCH] [X86][SSE] Add OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1))) -> MOVMSK+CMP reduction combine llvm-svn: 375463 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ llvm/test/CodeGen/X86/movmsk-cmp.ll | 30 ++++++++++++------------------ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8905b89..ed975e9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39527,6 +39527,24 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, DAG.getBitcast(MVT::v4f32, N1))); } + // Match any-of bool scalar reductions into a bitcast/movmsk + cmp. + // TODO: Support multiple SrcOps. + if (VT == MVT::i1) { + SmallVector SrcOps; + if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps) && + SrcOps.size() == 1) { + SDLoc dl(N); + unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); + EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); + SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); + if (Mask) { + APInt AllBits = APInt::getNullValue(NumElts); + return DAG.getSetCC(dl, MVT::i1, Mask, + DAG.getConstant(AllBits, dl, MaskVT), ISD::SETNE); + } + } + } + if (DCI.isBeforeLegalizeOps()) return SDValue(); diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 1d3d100..1cceb21 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -4513,21 +4513,17 @@ define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] ; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %ecx -; SSE2-NEXT: xorl $3, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: shrb %al -; SSE2-NEXT: orb %cl, %al +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: xorb $3, %al +; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_or_v2i64: ; AVX: # %bb.0: ; AVX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %ecx -; AVX-NEXT: xorl $3, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: shrb %al -; AVX-NEXT: orb %cl, %al +; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: xorb $3, %al +; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_or_v2i64: @@ -4668,19 +4664,17 @@ define i1 @movmsk_or_v2f64(<2 x double> %x, <2 x double> %y) { ; SSE2-LABEL: movmsk_or_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cmplepd %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %ecx -; SSE2-NEXT: movl %ecx, %eax -; SSE2-NEXT: shrb %al -; SSE2-NEXT: orb %cl, %al +; SSE2-NEXT: movmskpd %xmm1, %eax +; SSE2-NEXT: testb %al, %al +; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; ; AVX-LABEL: movmsk_or_v2f64: ; AVX: # %bb.0: ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: vmovmskpd %xmm0, %ecx -; AVX-NEXT: movl %ecx, %eax -; AVX-NEXT: shrb %al -; AVX-NEXT: orb %cl, %al +; AVX-NEXT: vmovmskpd %xmm0, %eax +; AVX-NEXT: testb %al, %al +; AVX-NEXT: setne %al ; AVX-NEXT: retq ; ; KNL-LABEL: movmsk_or_v2f64: -- 2.7.4