From: Craig Topper Date: Thu, 23 Feb 2023 17:16:54 +0000 (-0800) Subject: [LegalizeTypes][RISCV] Add a special case to ExpandIntRes_UADDSUBO for (uaddo X, 1). X-Git-Tag: upstream/17.0.6~16652 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2fc5a5117c613b580be321187555976e4e60872e;p=platform%2Fupstream%2Fllvm.git [LegalizeTypes][RISCV] Add a special case to ExpandIntRes_UADDSUBO for (uaddo X, 1). On targets that lack ADDCARRY support we split a wide uaddo into an ADD and a SETCC that both need to be split. For (uaddo X, 1) we can observe that when the add overflows the result will be 0. We can emit (seteq (or Lo, Hi), 0) to detect this. This improves D142071. There is an alternative here. We could use either ~(lo(X) & hi(X)) == 0 or (lo(X) & hi(X)) == -1 before the addition. That would be closer to the code before D142071. Reviewed By: liaolucy Differential Revision: https://reviews.llvm.org/D144614 --- diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index c9ce907..22d997b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3137,9 +3137,17 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS); SplitInteger(Sum, Lo, Hi); - // Calculate the overflow: addition overflows iff a + b < a, and subtraction - // overflows iff a - b > a. - Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); + if (N->getOpcode() == ISD::UADDO && isOneConstant(RHS)) { + // Special case: uaddo X, 1 overflowed if X+1 == 0. We can detect this + // with (Lo | Hi) == 0. + SDValue Or = DAG.getNode(ISD::OR, dl, Lo.getValueType(), Lo, Hi); + Ovf = DAG.getSetCC(dl, N->getValueType(1), Or, + DAG.getConstant(0, dl, Lo.getValueType()), ISD::SETEQ); + } else { + // Calculate the overflow: addition overflows iff a + b < a, and + // subtraction overflows iff a - b > a. + Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond); + } } // Legalized the flag result - switch anything that used the old flag to diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index a9124e4..c7fab17 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -519,16 +519,13 @@ define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) { define i1 @uaddo_i64_increment(i64 %x, ptr %p) { ; RV32-LABEL: uaddo_i64_increment: ; RV32: # %bb.0: -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a5, a1, a0 -; RV32-NEXT: bgeu a4, a3, .LBB12_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: sltu a0, a5, a1 -; RV32-NEXT: .LBB12_2: -; RV32-NEXT: sw a4, 0(a2) -; RV32-NEXT: sw a5, 4(a2) +; RV32-NEXT: addi a3, a0, 1 +; RV32-NEXT: sltu a0, a3, a0 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: or a0, a3, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: sw a1, 4(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: uaddo_i64_increment: diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index 6c47479..4e26ab2 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -499,16 +499,13 @@ entry: define zeroext i1 @uaddo.i64.constant_one(i64 %v1, ptr %res) { ; RV32-LABEL: uaddo.i64.constant_one: ; RV32: # %bb.0: # %entry -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: addi a4, a0, 1 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a5, a1, a0 -; RV32-NEXT: bgeu a4, a3, .LBB11_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: sltu a0, a5, a1 -; RV32-NEXT: .LBB11_2: # %entry -; RV32-NEXT: sw a4, 0(a2) -; RV32-NEXT: sw a5, 4(a2) +; RV32-NEXT: addi a3, a0, 1 +; RV32-NEXT: sltu a0, a3, a0 +; RV32-NEXT: add a1, a1, a0 +; RV32-NEXT: or a0, a3, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: sw a1, 4(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: uaddo.i64.constant_one: @@ -520,16 +517,13 @@ define zeroext i1 @uaddo.i64.constant_one(i64 %v1, ptr %res) { ; ; RV32ZBA-LABEL: uaddo.i64.constant_one: ; RV32ZBA: # %bb.0: # %entry -; RV32ZBA-NEXT: mv a3, a0 -; RV32ZBA-NEXT: addi a4, a0, 1 -; RV32ZBA-NEXT: sltu a0, a4, a0 -; RV32ZBA-NEXT: add a5, a1, a0 -; RV32ZBA-NEXT: bgeu a4, a3, .LBB11_2 -; RV32ZBA-NEXT: # %bb.1: # %entry -; RV32ZBA-NEXT: sltu a0, a5, a1 -; RV32ZBA-NEXT: .LBB11_2: # %entry -; RV32ZBA-NEXT: sw a4, 0(a2) -; RV32ZBA-NEXT: sw a5, 4(a2) +; RV32ZBA-NEXT: addi a3, a0, 1 +; RV32ZBA-NEXT: sltu a0, a3, a0 +; RV32ZBA-NEXT: add a1, a1, a0 +; RV32ZBA-NEXT: or a0, a3, a1 +; RV32ZBA-NEXT: seqz a0, a0 +; RV32ZBA-NEXT: sw a3, 0(a2) +; RV32ZBA-NEXT: sw a1, 4(a2) ; RV32ZBA-NEXT: ret ; ; RV64ZBA-LABEL: uaddo.i64.constant_one: