From 24847a90aaf5842041a2cfa977a4167997307b50 Mon Sep 17 00:00:00 2001 From: LiaoChunyu Date: Fri, 24 Mar 2023 09:04:59 +0800 Subject: [PATCH] [LegalizeTypes][RISCV] Add a special case for (add X, -1) to ExpandIntRes_ADDSUB On targets without ADDCARRY or ADDE, we need to emit a separate SETCC to determine carry from the low half to the high half. The high half is calculated by a series of ADDs. When RHSLo and RHSHi are -1, without this patch, we get: Hi = (add (add LHSHi,(setult Lo, LHSLo), -1) Where as with the patch we get: Hi = (sub LHSHi, (seteq LHSLo, 0)) Only RHSLo is -1 we can instead do (setne Lo, 0). Similar to gcc: https://godbolt.org/z/M83f6rz39 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D146635 --- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 14 ++++- llvm/test/CodeGen/RISCV/alu64.ll | 73 ++++++++++++++++++++++ .../test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 10 ++- llvm/test/CodeGen/RISCV/overflow-intrinsics.ll | 39 ++++++------ llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 14 ++--- 5 files changed, 113 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f741ee4..c4f2fbc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3026,7 +3026,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (isOneConstant(LoOps[1])) Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, DAG.getConstant(0, dl, NVT), ISD::SETEQ); - else + else if (isAllOnesConstant(LoOps[1])) { + if (isAllOnesConstant(HiOps[1])) + Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0], + DAG.getConstant(0, dl, NVT), ISD::SETEQ); + else + Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0], + DAG.getConstant(0, dl, NVT), ISD::SETNE); + } else Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); @@ -3037,7 +3044,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + if (isAllOnesConstant(LoOps[1]) && isAllOnesConstant(HiOps[1])) + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps[0], Carry); + else + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index 5349c82..29eb12f 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -530,3 +530,76 @@ define signext i32 @sraw(i64 %a, i32 zeroext %b) nounwind { %2 = ashr i32 %1, %b ret i32 %2 } + +define i64 @add_hi_and_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_and_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_and_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, -1 + ret i64 %2 +} + +define i64 @add_hi_zero_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_zero_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_zero_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add i64 %0, 4294967295 + ret i64 %2 +} + +define i64 @add_lo_negone(i64 %0) { +; RV64I-LABEL: add_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, -2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, -4294967297 + ret i64 %2 +} + +define i64 @add_hi_one_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_one_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_one_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, 8589934591 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index f1528e9..dc4b502 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -1263,10 +1263,9 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: bnez a0, .LBB7_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: seqz a0, a4 +; RV32I-NEXT: sub a3, a5, a0 ; RV32I-NEXT: addi a2, a4, -1 -; RV32I-NEXT: sltu a0, a2, a4 -; RV32I-NEXT: add a0, a5, a0 -; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: j .LBB7_1 ; RV32I-NEXT: .LBB7_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 @@ -1327,10 +1326,9 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: bnez a0, .LBB7_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: seqz a0, a4 +; RV32IA-NEXT: sub a3, a5, a0 ; RV32IA-NEXT: addi a2, a4, -1 -; RV32IA-NEXT: sltu a0, a2, a4 -; RV32IA-NEXT: add a0, a5, a0 -; RV32IA-NEXT: addi a3, a0, -1 ; RV32IA-NEXT: j .LBB7_1 ; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 543c17f..acad770 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -666,14 +666,13 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) { define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) { ; RV32-LABEL: uaddo_i64_decrement_alt: ; RV32: # %bb.0: -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: sltu a4, a3, a0 -; RV32-NEXT: add a4, a1, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: seqz a3, a0 +; RV32-NEXT: sub a3, a1, a3 +; RV32-NEXT: addi a4, a0, -1 +; RV32-NEXT: sw a4, 0(a2) ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: sw a4, 4(a2) +; RV32-NEXT: sw a3, 4(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: uaddo_i64_decrement_alt: @@ -695,12 +694,11 @@ define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) { ; RV32: # %bb.0: ; RV32-NEXT: or a3, a0, a1 ; RV32-NEXT: snez a3, a3 -; RV32-NEXT: addi a4, a0, -1 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: sub a1, a1, a4 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a4, 0(a2) -; RV32-NEXT: sw a0, 4(a2) +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: sw a1, 4(a2) ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: ret ; @@ -1222,22 +1220,21 @@ define i64 @foo2(ptr %p) { define void @PR41129(ptr %p64) { ; RV32-LABEL: PR41129: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a2, 0(a0) -; RV32-NEXT: or a3, a2, a1 +; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: or a3, a1, a2 ; RV32-NEXT: beqz a3, .LBB36_2 ; RV32-NEXT: # %bb.1: # %false -; RV32-NEXT: andi a2, a2, 7 +; RV32-NEXT: andi a1, a1, 7 ; RV32-NEXT: sw zero, 4(a0) -; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 0(a0) ; RV32-NEXT: ret ; RV32-NEXT: .LBB36_2: # %true -; RV32-NEXT: addi a3, a2, -1 -; RV32-NEXT: sltu a2, a3, a2 -; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: seqz a3, a1 +; RV32-NEXT: sub a2, a2, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: sw a3, 0(a0) -; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a2, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: PR41129: diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 58469ca..6be6785 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -454,10 +454,9 @@ define i32 @sext_of_not_i32(i1 %x) { define i64 @sext_of_not_i64(i1 %x) { ; RV32I-LABEL: sext_of_not_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a1, a0, 1 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i64: @@ -541,10 +540,9 @@ define i64 @dec_of_zexted_cmp_i64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 7 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: seqz a1, a0 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: dec_of_zexted_cmp_i64: -- 2.7.4