From 9ab23101a82440c91d630596e0e8264103a34f74 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 30 Jan 2019 22:41:35 +0000 Subject: [PATCH] [DAGCombiner] sub X, 0/1 --> add X, 0/-1 This extends the existing transform for: add X, 0/1 --> sub X, 0/-1 ...to allow the sibling subtraction fold. This pattern could regress with the proposed change in D57401. llvm-svn: 352680 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 32 ++++++++++++++++++--------- llvm/test/CodeGen/AArch64/bool-ext-inc.ll | 7 ++---- llvm/test/CodeGen/X86/bool-ext-inc.ll | 9 ++++---- llvm/test/CodeGen/X86/select_const.ll | 5 ++--- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9837f5b..d2e2351 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2256,6 +2256,23 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { return SDValue(); } +/// Given the operands of an add/sub operation, see if the 2nd operand is a +/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert +/// the opcode and bypass the mask operation. +static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, + SelectionDAG &DAG, const SDLoc &DL) { + if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) + return SDValue(); + + EVT VT = N0.getValueType(); + if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) + return SDValue(); + + // add N0, (and (AssertSext X, i1), 1) --> sub N0, X + // sub N0, (and (AssertSext X, i1), 1) --> add N0, X + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); +} + SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); @@ -2268,16 +2285,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) N1.getOperand(0).getOperand(1), N1.getOperand(1))); - if (N1.getOpcode() == ISD::AND) { - SDValue AndOp0 = N1.getOperand(0); - unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); - unsigned DestBits = VT.getScalarSizeInBits(); - - // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) - // and similar xforms where the inner op is either ~0 or 0. - if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1))) - return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0); - } + if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) + return V; // add (sext i1), X -> sub X, (zext i1) if (N0.getOpcode() == ISD::SIGN_EXTEND && @@ -2727,6 +2736,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (SDValue V = foldAddSubOfSignBit(N, DAG)) return V; + if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N))) + return V; + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { diff --git a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll index 2b702f9..8f45e97 100644 --- a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll +++ b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll @@ -17,9 +17,7 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x ; CHECK-LABEL: zextbool_sub_vector: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %c = icmp eq <4 x i32> %c1, %c2 %b = zext <4 x i1> %c to <4 x i32> @@ -30,8 +28,7 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %c1, <4 x i32> %c2, <4 x i32> %x define i32 @assertsext_sub_1(i1 signext %cond, i32 %y) { ; CHECK-LABEL: assertsext_sub_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 -; CHECK-NEXT: sub w0, w1, w8 +; CHECK-NEXT: add w0, w1, w0 ; CHECK-NEXT: ret %e = zext i1 %cond to i32 %r = sub i32 %y, %e diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll index cfd6dc8..d89893f 100644 --- a/llvm/test/CodeGen/X86/bool-ext-inc.ll +++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll @@ -118,8 +118,7 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %cmp1, <4 x i32> %cmp2, <4 x i32 ; CHECK-LABEL: zextbool_sub_vector: ; CHECK: # %bb.0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0 -; CHECK-NEXT: vpsubd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 ; CHECK-NEXT: retq %c = icmp eq <4 x i32> %cmp1, %cmp2 %b = zext <4 x i1> %c to <4 x i32> @@ -130,9 +129,9 @@ define <4 x i32> @zextbool_sub_vector(<4 x i32> %cmp1, <4 x i32> %cmp2, <4 x i32 define i32 @assertsext_sub_1(i1 signext %cond, i32 %y) { ; CHECK-LABEL: assertsext_sub_1: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal (%rdi,%rsi), %eax ; CHECK-NEXT: retq %e = zext i1 %cond to i32 %r = sub i32 %y, %e diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll index 0f252c8..8ab603d3 100644 --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -193,9 +193,8 @@ define i32 @select_C_Cplus1_zeroext(i1 zeroext %cond) { define i32 @select_C_Cplus1_signext(i1 signext %cond) { ; CHECK-LABEL: select_C_Cplus1_signext: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: subl %edi, %eax +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 42(%rdi), %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 41, i32 42 ret i32 %sel -- 2.7.4