From 29c3a2c6dbce4b8232ea21944abc18a3e165d26a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 4 Feb 2023 15:06:12 +0000 Subject: [PATCH] [X86] combinePredicateReduction - fold any_of(setcc(x,y,ne)) -> pmovmskb(not(pcmpeqb())) Improves codegen for v2i64 cases, similar to what we already do for all_of(setcc(x,y,eq)) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ llvm/test/CodeGen/X86/vector-compare-any_of.ll | 12 +++++------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fc60ae7..47e6bd7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44299,20 +44299,20 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, Movmsk = DAG.getBitcast(MovmskVT, Match); } else { // For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()). - // TODO: any_of(setcc(x,y,ne)) - use PMOVMSKB(NOT(PCMPEQB())). + // For any_of(setcc(x,y,ne)) - use PMOVMSKB(NOT(PCMPEQB())). if (Match.getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(Match.getOperand(2))->get(); - if (BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) { + if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) || + (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) { EVT VecVT = Match.getOperand(0).getValueType(); EVT VecSVT = VecVT.getScalarType(); if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) { NumElts *= VecSVT.getSizeInBits() / 8; EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts); MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); - Match = DAG.getSetCC(DL, MatchVT, - DAG.getBitcast(CmpVT, Match.getOperand(0)), - DAG.getBitcast(CmpVT, Match.getOperand(1)), - ISD::CondCode::SETEQ); + Match = DAG.getSetCC( + DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)), + DAG.getBitcast(CmpVT, Match.getOperand(1)), CC); } } } diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index 730bf30..53773ef 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1050,24 +1050,22 @@ define i1 @bool_reduction_v8f32(<8 x float> %x, <8 x float> %y) { define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-LABEL: bool_reduction_v2i64: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: movmskpd %xmm1, %eax -; SSE2-NEXT: cmpl $3, %eax +; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; ; SSE42-LABEL: bool_reduction_v2i64: ; SSE42: # %bb.0: -; SSE42-NEXT: psubq %xmm1, %xmm0 +; SSE42-NEXT: psubb %xmm1, %xmm0 ; SSE42-NEXT: ptest %xmm0, %xmm0 ; SSE42-NEXT: setne %al ; SSE42-NEXT: retq ; ; AVX-LABEL: bool_reduction_v2i64: ; AVX: # %bb.0: -; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vptest %xmm0, %xmm0 ; AVX-NEXT: setne %al ; AVX-NEXT: retq -- 2.7.4