From c430f0f532de3802ca76ed9f7f2d889987eb2749 Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Fri, 22 Apr 2022 14:59:53 +0800 Subject: [PATCH] [X86] Add use condition for combineSetCCMOVMSK Reviewed by RKSimon, LuoYuanke Differential Revision: https://reviews.llvm.org/D123652 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 ++++++++-- llvm/test/CodeGen/X86/vector-compare-all_of.ll | 9 ++++----- llvm/test/CodeGen/X86/vector-compare-any_of.ll | 8 +++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index de0eb3b..8cc6a28 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -45012,6 +45012,12 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, if (!IsAnyOf && !IsAllOf) return SDValue(); + // TODO: Check more combining cases for me. + // Here we check the cmp use number to decide do combining or not. + // Currently we only get 2 tests about combining "MOVMSK(CONCAT(..))" + // and "MOVMSK(PCMPEQ(..))" are fit to use this constraint. + bool IsOneUse = CmpOp.getNode()->hasOneUse(); + // See if we can peek through to a vector with a wider element type, if the // signbits extend down to all the sub-elements as well. // Calling MOVMSK with the wider type, avoiding the bitcast, helps expose @@ -45040,7 +45046,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSK(CONCAT(X,Y)) != 0 -> MOVMSK(OR(X,Y)). // MOVMSK(CONCAT(X,Y)) == -1 -> MOVMSK(AND(X,Y)). // MOVMSK(CONCAT(X,Y)) != -1 -> MOVMSK(AND(X,Y)). - if (VecVT.is256BitVector() && NumElts <= CmpBits) { + if (VecVT.is256BitVector() && NumElts <= CmpBits && IsOneUse) { SmallVector Ops; if (collectConcatOps(peekThroughBitcasts(Vec).getNode(), Ops) && Ops.size() == 2) { @@ -45061,7 +45067,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X). // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)). // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)). - if (IsAllOf && Subtarget.hasSSE41()) { + if (IsAllOf && Subtarget.hasSSE41() && IsOneUse) { MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; SDValue BC = peekThroughBitcasts(Vec); // Ensure MOVMSK was testing every signbit of BC. diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll index 7eb156f..bbc6757 100644 --- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll @@ -881,7 +881,7 @@ define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) { ret i8 %11 } -; FIXME: Should not "MOVMSK(PCMPEQ(..)) -> PTESTZ(..)" when cmp result has muti-uses. +; Should not "MOVMSK(PCMPEQ(..)) -> PTESTZ(..)" when cmp result has muti-uses. define i32 @test_v32i8_muti_uses(<32 x i8> %x, <32 x i8>%y, i32 %z) { ; SSE-LABEL: test_v32i8_muti_uses: ; SSE: # %bb.0: @@ -914,10 +914,9 @@ define i32 @test_v32i8_muti_uses(<32 x i8> %x, <32 x i8>%y, i32 %z) { ; ; AVX2-LABEL: test_v32i8_muti_uses: ; AVX2: # %bb.0: -; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm2 -; AVX2-NEXT: vpmovmskb %ymm2, %ecx -; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vptest %ymm0, %ymm0 +; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %ecx +; AVX2-NEXT: cmpl $-1, %ecx ; AVX2-NEXT: movl $16, %eax ; AVX2-NEXT: cmovnel %ecx, %eax ; AVX2-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll index 1665844..aee0fa8 100644 --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -1358,11 +1358,9 @@ define {i32, i1} @test_v16i8_muti_uses(<16 x i8> %x, <16 x i8>%y, <16 x i8> %z) ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpeqb %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm2 -; AVX2-NEXT: vpmovmskb %ymm2, %eax -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmovmskb %xmm0, %ecx -; AVX2-NEXT: testl %ecx, %ecx +; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpmovmskb %ymm0, %eax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: sete %dl ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq -- 2.7.4