From d4f4a1ba626d7c3e4442d6f68feb79d56eba9601 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 25 Jun 2021 12:58:31 -0700 Subject: [PATCH] [RISCV] Add DAG combine to detect opportunities to replace (i64 (any_extend (i32 X)) with sign_extend. If type legalization is going to insert a sign_extend for other users of X and we can fold the sign_extend into ADDW/MULW/SUBW, it is better to replace the ANY_EXTEND so we don't end up with a separate ADD/MUL/SUB instruction for the users of the ANY_EXTEND. I'm only handling setcc uses right now, but there are other instructions that force sign_extends like ashr. There are probably other *W instructions we could use in addition to ADDW/SUBW/MULW. My motivating case was a loop terminating compare and a phi use as seen in the new test file. Reviewed By: asb Differential Revision: https://reviews.llvm.org/D104581 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 80 +++++++++++++++++++++++++++++ llvm/test/CodeGen/RISCV/aext-to-sext.ll | 48 +++++++++++++++++ llvm/test/CodeGen/RISCV/rv64zbb.ll | 5 +- 3 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/aext-to-sext.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 8a2e96d..7f9d0cf 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -836,6 +836,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::ANY_EXTEND); if (Subtarget.hasStdExtV()) { setTargetDAGCombine(ISD::FCOPYSIGN); setTargetDAGCombine(ISD::MGATHER); @@ -5606,6 +5607,83 @@ static SDValue performXORCombine(SDNode *N, return combineSelectCCAndUseCommutative(N, DAG, false); } +// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND +// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free +// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be +// removed during type legalization leaving an ADD/SUB/MUL use that won't use +// ADDW/SUBW/MULW. +static SDValue performANY_EXTENDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { + if (!Subtarget.is64Bit()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + + SDValue Src = N->getOperand(0); + EVT VT = N->getValueType(0); + if (VT != MVT::i64 || Src.getValueType() != MVT::i32) + return SDValue(); + + // The opcode must be one that can implicitly sign_extend. + // FIXME: Additional opcodes. + switch (Src.getOpcode()) { + default: + return SDValue(); + case ISD::MUL: + if (!Subtarget.hasStdExtM()) + return SDValue(); + LLVM_FALLTHROUGH; + case ISD::ADD: + case ISD::SUB: + break; + } + + SmallVector SetCCs; + for (SDNode::use_iterator UI = Src.getNode()->use_begin(), + UE = Src.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != Src.getResNo()) + continue; + // All i32 setccs are legalized by sign extending operands. + if (User->getOpcode() == ISD::SETCC) { + SetCCs.push_back(User); + continue; + } + // We don't know if we can extend this user. + break; + } + + // If we don't have any SetCCs, this isn't worthwhile. + if (SetCCs.empty()) + return SDValue(); + + SDLoc DL(N); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src); + DCI.CombineTo(N, SExt); + + // Promote all the setccs. + for (SDNode *SetCC : SetCCs) { + SmallVector Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Src) + Ops.push_back(SExt); + else + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + DCI.CombineTo(SetCC, + DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); + } + return SDValue(N, 0); +} + SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -5830,6 +5908,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return performORCombine(N, DCI, Subtarget); case ISD::XOR: return performXORCombine(N, DCI, Subtarget); + case ISD::ANY_EXTEND: + return performANY_EXTENDCombine(N, DCI, Subtarget); case RISCVISD::SELECT_CC: { // Transform SDValue LHS = N->getOperand(0); diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll new file mode 100644 index 0000000..45b30dd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +; Make sure we don't generate an addi in the loop in +; addition to the addiw. Previously we type legalize the +; setcc use using signext and the phi use using anyext. +; We now detect when it would be beneficial to replace +; anyext with signext. + +define void @quux(i32 signext %arg, i32 signext %arg1) nounwind { +; RV64I-LABEL: quux: +; RV64I: # %bb.0: # %bb +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: beq a0, a1, .LBB0_3 +; RV64I-NEXT: # %bb.1: # %bb2.preheader +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: .LBB0_2: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: call hoge@plt +; RV64I-NEXT: addiw s1, s1, 1 +; RV64I-NEXT: bne s1, s0, .LBB0_2 +; RV64I-NEXT: .LBB0_3: # %bb6 +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret +bb: + %tmp = icmp eq i32 %arg, %arg1 + br i1 %tmp, label %bb6, label %bb2 + +bb2: ; preds = %bb2, %bb + %tmp3 = phi i32 [ %tmp4, %bb2 ], [ %arg, %bb ] + tail call void @hoge() + %tmp4 = add nsw i32 %tmp3, 1 + %tmp5 = icmp eq i32 %tmp4, %arg1 + br i1 %tmp5, label %bb6, label %bb2 + +bb6: ; preds = %bb2, %bb + ret void +} + +declare void @hoge() diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 97a89c3..7a20bf0 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -193,12 +193,11 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: addiw a2, a0, -1 +; RV64I-NEXT: addiw a0, a0, -1 ; RV64I-NEXT: addi s0, zero, 32 ; RV64I-NEXT: addi a1, zero, 32 -; RV64I-NEXT: beqz a2, .LBB2_2 +; RV64I-NEXT: beqz a0, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %cond.false -; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: srliw a1, a0, 1 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: srli a0, a0, 32 -- 2.7.4