From: Sanjay Patel Date: Sun, 17 Mar 2019 14:57:40 +0000 (+0000) Subject: [TargetLowering] improve the default expansion of uaddsat/usubsat X-Git-Tag: llvmorg-9.0.0-rc1~9850 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6a6e808b699ba71e02b9a54748551e87f279d52a;p=platform%2Fupstream%2Fllvm.git [TargetLowering] improve the default expansion of uaddsat/usubsat This is a subset of what was proposed in: D59006 ...and may overlap with test changes from: D59174 ...but it seems like a good general optimization to turn selects into bitwise-logic when possible because we never know exactly what can happen at this stage of DAG combining depending on how the target has defined things. Differential Revision: https://reviews.llvm.org/D59066 llvm-svn: 356332 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fcda0e513eca..8bddebd75e55 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -5426,9 +5426,20 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); if (Opcode == ISD::UADDSAT) { + if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + // (LHS + RHS) | OverflowMask + SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); + return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask); + } // Overflow ? 0xffff.... : (LHS + RHS) return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff); } else if (Opcode == ISD::USUBSAT) { + if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) { + // (LHS - RHS) & ~OverflowMask + SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT); + SDValue Not = DAG.getNOT(dl, OverflowMask, VT); + return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not); + } // Overflow ? 0 : (LHS - RHS) return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } else { diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index efa99e80976b..55b42e790532 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -404,8 +404,7 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: add v1.2d, v0.2d, v1.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d -; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %z = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %z @@ -418,10 +417,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-NEXT: add v3.2d, v1.2d, v3.2d ; CHECK-NEXT: cmhi v0.2d, v0.2d, v2.2d ; CHECK-NEXT: cmhi v1.2d, v1.2d, v3.2d -; CHECK-NEXT: bic v2.16b, v2.16b, v0.16b -; CHECK-NEXT: bic v3.16b, v3.16b, v1.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-NEXT: orr v0.16b, v2.16b, v0.16b +; CHECK-NEXT: orr v1.16b, v3.16b, v1.16b ; CHECK-NEXT: ret %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z @@ -438,14 +435,10 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-NEXT: cmhi v1.2d, v1.2d, v5.2d ; CHECK-NEXT: cmhi v2.2d, v2.2d, v6.2d ; CHECK-NEXT: cmhi v3.2d, v3.2d, v7.2d -; CHECK-NEXT: bic v4.16b, v4.16b, v0.16b -; CHECK-NEXT: bic v5.16b, v5.16b, v1.16b -; CHECK-NEXT: bic v6.16b, v6.16b, v2.16b -; CHECK-NEXT: bic v7.16b, v7.16b, v3.16b -; CHECK-NEXT: orr v0.16b, v0.16b, v4.16b -; CHECK-NEXT: orr v1.16b, v1.16b, v5.16b -; CHECK-NEXT: orr v2.16b, v2.16b, v6.16b -; CHECK-NEXT: orr v3.16b, v3.16b, v7.16b +; CHECK-NEXT: orr v0.16b, v4.16b, v0.16b +; CHECK-NEXT: orr v1.16b, v5.16b, v1.16b +; CHECK-NEXT: orr v2.16b, v6.16b, v2.16b +; CHECK-NEXT: orr v3.16b, v7.16b, v3.16b ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z