From c143db3b1032042193c152790bcefe34365e6d6c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 18 Jun 2020 17:41:15 +0100 Subject: [PATCH] [X86][SSE] combineHorizontalPredicateResult - improve all_of(X == 0) for vXi64 on pre-SSE41 targets Without SSE41 we don't have the PCMPEQQ instruction, making cmp-with-zero reductions more complicated than necessary. We can compare as vXi32 (PCMPEQD) and tweak the MOVMSK comparison to test upper/lower DWORD comparisons. This pre-fixes something that occurs with null tests for vectors of (64-bit) pointers such as in PR35129. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 19 +++++++++++++ llvm/test/CodeGen/X86/vector-reduce-and-bool.ll | 36 ++++++++----------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cd58423..e2a9231 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38504,6 +38504,25 @@ static SDValue combineHorizontalPredicateResult(SDNode *Extract, EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); Movmsk = DAG.getBitcast(MovmskVT, Match); } else { + // For all_of(setcc(vec,0,eq)) - avoid vXi64 comparisons if we don't have + // PCMPEQQ (SSE41+), use PCMPEQD instead. + if (BinOp == ISD::AND && !Subtarget.hasSSE41() && + Match.getOpcode() == ISD::SETCC && + ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) && + cast(Match.getOperand(2))->get() == + ISD::CondCode::SETEQ) { + SDValue Vec = Match.getOperand(0); + if (Vec.getValueType().getScalarType() == MVT::i64 && + (2 * NumElts) <= MaxElts) { + NumElts *= 2; + EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); + Match = DAG.getSetCC( + DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)), + DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ); + } + } + // Use combineBitcastvxi1 to create the MOVMSK. while (NumElts > MaxElts) { SDValue Lo, Hi; diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll index ce6a424..4c34ee8 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll @@ -858,10 +858,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) { ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2] -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: movmskpd %xmm0, %eax -; SSE2-NEXT: cmpb $3, %al +; SSE2-NEXT: movmskps %xmm1, %eax +; SSE2-NEXT: cmpb $15, %al ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1088,14 +1086,11 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) { ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] -; SSE2-NEXT: pand %xmm1, %xmm3 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: movmskps %xmm1, %eax -; SSE2-NEXT: cmpb $15, %al +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: packsswb %xmm0, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpb $-1, %al ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1383,23 +1378,14 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) { ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,0,3,2] -; SSE2-NEXT: pand %xmm3, %xmm5 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2] -; SSE2-NEXT: pand %xmm2, %xmm3 -; SSE2-NEXT: packssdw %xmm5, %xmm3 +; SSE2-NEXT: packssdw %xmm3, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] -; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: packssdw %xmm2, %xmm1 -; SSE2-NEXT: packssdw %xmm3, %xmm1 -; SSE2-NEXT: packsswb %xmm1, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpb $-1, %al +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: packsswb %xmm2, %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpw $-1, %ax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; -- 2.7.4