From: Simon Pilgrim Date: Sun, 17 Jul 2022 17:51:41 +0000 (+0100) Subject: [DAG] Fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) X-Git-Tag: upstream/15.0.7~1412 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=53b90dd372c440e26116ff66fd4a514c0055ebb1;p=platform%2Fupstream%2Fllvm.git [DAG] Fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) Pulled out of D77804 Alive2: https://alive2.llvm.org/ce/z/g61VRe --- diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a4311704..0ca2756 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6876,20 +6876,44 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { } /// OR combines for which the commuted variant will be tried as well. -static SDValue visitORCommutative( - SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { +static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, + SDNode *N) { EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. - if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0), + if (getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) - if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1), + if (getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); + + // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) + if (N1.getOpcode() == ISD::AND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::OR) { + SDValue N11 = N1.getOperand(1); + SDValue N100 = N10.getOperand(0); + SDValue N101 = N10.getOperand(1); + if (((N00 == N100) || (N00 == N101)) && N0->hasOneUse() && + N1->hasOneUse()) { + SDLoc DL(N); + if (SDValue C12 = + DAG.FoldConstantArithmetic(ISD::OR, DL, VT, {N01, N11})) { + SDValue Y = (N00 == N100 ? N101 : N100); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::AND, DL, VT, N00, C12), + DAG.getNode(ISD::AND, DL, VT, Y, N11)); + } + } + } + } } if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index d451eed..8d490c7 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -489,17 +489,15 @@ define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { ret <4 x i32> %2 } -; FIXME: fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) +; fold (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) define i32 @or_and_and_i32(i32 %x, i32 %y) { ; CHECK-LABEL: or_and_and_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: orl %edi, %esi -; CHECK-NEXT: andl $8, %edi +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: andl $-11, %esi -; CHECK-NEXT: leal (%rsi,%rdi), %eax +; CHECK-NEXT: andl $-3, %eax +; CHECK-NEXT: orl %esi, %eax ; CHECK-NEXT: retq %xy = or i32 %x, %y %mx = and i32 %x, 8 @@ -511,11 +509,9 @@ define i32 @or_and_and_i32(i32 %x, i32 %y) { define i64 @or_and_and_commute_i64(i64 %x, i64 %y) { ; CHECK-LABEL: or_and_and_commute_i64: ; CHECK: # %bb.0: -; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: orq %rdi, %rax -; CHECK-NEXT: andl $8, %edi +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: andq $-3, %rax -; CHECK-NEXT: orq %rdi, %rax ; CHECK-NEXT: retq %xy = or i64 %x, %y %mx = and i64 %x, 8 @@ -527,9 +523,8 @@ define i64 @or_and_and_commute_i64(i64 %x, i64 %y) { define <4 x i32> @or_and_and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: or_and_and_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: orps %xmm0, %xmm1 -; CHECK-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: retq %xy = or <4 x i32> %x, %y