From d47f056cd2843341de61c36683a44bc803500675 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 28 Oct 2022 12:11:00 +0100 Subject: [PATCH] [DAG] visitXOR - fold XOR(A,B) -> OR(A,B) iff A and B have no common bits Alive2: https://alive2.llvm.org/ce/z/7wvfns Part of Issue #58624 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++ .../unfold-masked-merge-scalar-variablemask.ll | 7 +--- ...nfold-masked-merge-vector-variablemask-const.ll | 7 +--- .../unfold-masked-merge-scalar-variablemask.ll | 24 +++-------- .../X86/unfold-masked-merge-scalar-variablemask.ll | 20 ++++----- ...nfold-masked-merge-vector-variablemask-const.ll | 47 ++++++++-------------- 6 files changed, 40 insertions(+), 70 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b4d2b21..beed155 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8618,6 +8618,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; + // fold (a^b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // look for 'add-like' folds: // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll index b7148ed..79f299d 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -452,8 +452,7 @@ define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w2, w1 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: orr w0, w2, w1 ; CHECK-NEXT: ret %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %mask @@ -477,9 +476,7 @@ define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_mone_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: bic w8, w8, w2 -; CHECK-NEXT: eor w0, w8, w1 +; CHECK-NEXT: orn w0, w1, w2 ; CHECK-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 -1, %y ; %x diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll index aa0b7e1..90ccd6d 100644 --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll @@ -126,8 +126,7 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> % define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.16b, v2.16b, v1.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %n0 = xor <4 x i32> , %y ; %x %n1 = and <4 x i32> %n0, %mask @@ -153,9 +152,7 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) { ; CHECK-LABEL: in_constant_mone_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v0.16b, v1.16b -; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orn v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %notmask = xor <4 x i32> %mask, %n0 = xor <4 x i32> , %y ; %x diff --git a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll index 645de97..6a68157 100644 --- a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll @@ -759,18 +759,10 @@ define i32 @out_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { } define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { -; CHECK-I-LABEL: in_constant_mone_vary: -; CHECK-I: # %bb.0: -; CHECK-I-NEXT: not a0, a1 -; CHECK-I-NEXT: and a0, a0, a2 -; CHECK-I-NEXT: xor a0, a0, a1 -; CHECK-I-NEXT: ret -; -; CHECK-ZBB-LABEL: in_constant_mone_vary: -; CHECK-ZBB: # %bb.0: -; CHECK-ZBB-NEXT: andn a0, a2, a1 -; CHECK-ZBB-NEXT: xor a0, a0, a1 -; CHECK-ZBB-NEXT: ret +; CHECK-LABEL: in_constant_mone_vary: +; CHECK: # %bb.0: +; CHECK-NEXT: or a0, a2, a1 +; CHECK-NEXT: ret %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %mask %r = xor i32 %n1, %y @@ -803,16 +795,12 @@ define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-I-LABEL: in_constant_mone_vary_invmask: ; CHECK-I: # %bb.0: ; CHECK-I-NEXT: not a0, a2 -; CHECK-I-NEXT: not a2, a1 -; CHECK-I-NEXT: and a0, a2, a0 -; CHECK-I-NEXT: xor a0, a0, a1 +; CHECK-I-NEXT: or a0, a0, a1 ; CHECK-I-NEXT: ret ; ; CHECK-ZBB-LABEL: in_constant_mone_vary_invmask: ; CHECK-ZBB: # %bb.0: -; CHECK-ZBB-NEXT: not a0, a1 -; CHECK-ZBB-NEXT: andn a0, a0, a2 -; CHECK-ZBB-NEXT: xor a0, a0, a1 +; CHECK-ZBB-NEXT: orn a0, a1, a2 ; CHECK-ZBB-NEXT: ret %notmask = xor i32 %mask, -1 %n0 = xor i32 -1, %y ; %x diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll index 7d3047f..9c9d069 100644 --- a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll @@ -733,15 +733,13 @@ define i32 @in_constant_mone_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_mone_vary: ; CHECK-NOBMI: # %bb.0: ; CHECK-NOBMI-NEXT: movl %esi, %eax -; CHECK-NOBMI-NEXT: notl %eax -; CHECK-NOBMI-NEXT: andl %edx, %eax -; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: orl %edx, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_mone_vary: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: andnl %edx, %esi, %eax -; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: movl %esi, %eax +; CHECK-BMI-NEXT: orl %edx, %eax ; CHECK-BMI-NEXT: retq %n0 = xor i32 -1, %y ; %x %n1 = and i32 %n0, %mask @@ -775,18 +773,16 @@ define i32 @out_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { define i32 @in_constant_mone_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-NOBMI-LABEL: in_constant_mone_vary_invmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: notl %edx -; CHECK-NOBMI-NEXT: movl %esi, %eax +; CHECK-NOBMI-NEXT: movl %edx, %eax ; CHECK-NOBMI-NEXT: notl %eax -; CHECK-NOBMI-NEXT: andl %edx, %eax -; CHECK-NOBMI-NEXT: xorl %esi, %eax +; CHECK-NOBMI-NEXT: orl %esi, %eax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: in_constant_mone_vary_invmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: notl %edx -; CHECK-BMI-NEXT: andnl %edx, %esi, %eax -; CHECK-BMI-NEXT: xorl %esi, %eax +; CHECK-BMI-NEXT: movl %edx, %eax +; CHECK-BMI-NEXT: notl %eax +; CHECK-BMI-NEXT: orl %esi, %eax ; CHECK-BMI-NEXT: retq %notmask = xor i32 %mask, -1 %n0 = xor i32 -1, %y ; %x diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll index 41f0381..cca56dc 100644 --- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -336,26 +336,21 @@ define <4 x i32> @in_constant_mone_vary(ptr%px, ptr%py, ptr%pmask) { ; CHECK-SSE1-LABEL: in_constant_mone_vary: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 -; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: andnps (%rcx), %xmm1 -; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 -; CHECK-SSE1-NEXT: movaps %xmm1, (%rdi) +; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 +; CHECK-SSE1-NEXT: orps (%rdx), %xmm0 +; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1 -; CHECK-SSE2-NEXT: movaps %xmm1, %xmm0 -; CHECK-SSE2-NEXT: andnps (%rdx), %xmm0 -; CHECK-SSE2-NEXT: xorps %xmm1, %xmm0 +; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: orps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 -; CHECK-XOP-NEXT: vandnps (%rdx), %xmm0, %xmm1 -; CHECK-XOP-NEXT: vxorps %xmm0, %xmm1, %xmm0 +; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 +; CHECK-XOP-NEXT: vorps (%rsi), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, ptr%px, align 16 %y = load <4 x i32>, ptr%py, align 16 @@ -411,32 +406,24 @@ define <4 x i32> @in_constant_mone_vary_invmask(ptr%px, ptr%py, ptr%pmask) { ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 -; CHECK-SSE1-NEXT: movaps (%rcx), %xmm1 -; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; CHECK-SSE1-NEXT: movaps %xmm0, %xmm2 -; CHECK-SSE1-NEXT: andnps %xmm1, %xmm2 -; CHECK-SSE1-NEXT: xorps %xmm0, %xmm2 -; CHECK-SSE1-NEXT: movaps %xmm2, (%rdi) +; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 +; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE1-NEXT: orps (%rdx), %xmm0 +; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movdqa (%rsi), %xmm1 -; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-SSE2-NEXT: pxor (%rdx), %xmm2 -; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 -; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-SSE2-NEXT: pxor (%rdx), %xmm0 +; CHECK-SSE2-NEXT: por (%rsi), %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 -; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm1 -; CHECK-XOP-NEXT: vpandn %xmm1, %xmm0, %xmm1 -; CHECK-XOP-NEXT: vpxor %xmm0, %xmm1, %xmm0 +; CHECK-XOP-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpxor (%rdx), %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpor (%rsi), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, ptr%px, align 16 %y = load <4 x i32>, ptr%py, align 16 -- 2.7.4