From dee5a8ca325f3c752534442233cb601910e918c4 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Mon, 30 Aug 2021 21:14:12 +0800 Subject: [PATCH] [RISCV] Optimize (add (shl x, c0), (shl y, c1)) with SH*ADD Optimize (add (shl x, c0), (shl y, c1)) -> (SLLI (SH*ADD x, y), c1), if c0-c1 == 1/2/3. Reviewed By: craig.topper, luismarques Differential Revision: https://reviews.llvm.org/D108916 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 55 +++++++++++++++++++++++++++-- llvm/test/CodeGen/RISCV/rv32zba.ll | 18 ++++------ llvm/test/CodeGen/RISCV/rv64zba.ll | 36 +++++++------------ 3 files changed, 71 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e9ac4bb..f7969bd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5913,6 +5913,52 @@ static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG, DAG.getConstant(Match1->ShAmt, DL, VT)); } +// Optimize (add (shl x, c0), (shl y, c1)) -> +// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. +static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Perform this optimization only in the zba extension. + if (!Subtarget.hasStdExtZba()) + return SDValue(); + + // Skip for vector types and larger types. + EVT VT = N->getValueType(0); + if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen()) + return SDValue(); + + // The two operand nodes must be SHL and have no other use. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL || + !N0->hasOneUse() || !N1->hasOneUse()) + return SDValue(); + + // Check c0 and c1. + auto *N0C = dyn_cast(N0->getOperand(1)); + auto *N1C = dyn_cast(N1->getOperand(1)); + if (!N0C || !N1C) + return SDValue(); + int64_t C0 = N0C->getSExtValue(); + int64_t C1 = N1C->getSExtValue(); + if (C0 <= 0 || C1 <= 0) + return SDValue(); + + // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable. + int64_t Bits = std::min(C0, C1); + int64_t Diff = std::abs(C0 - C1); + if (Diff != 1 && Diff != 2 && Diff != 3) + return SDValue(); + + // Build nodes. + SDLoc DL(N); + SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0); + SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0); + SDValue NA0 = + DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT)); + SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS); + return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT)); +} + // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is // non-zero, and to x when it is. Any repeated GREVI stage undoes itself. // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does @@ -6017,7 +6063,12 @@ static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + // Fold (add (shl x, c0), (shl y, c1)) -> + // (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3]. + if (SDValue V = transformAddShlImm(N, DAG, Subtarget)) + return V; // fold (add (select lhs, rhs, cc, 0, y), x) -> // (select lhs, rhs, cc, x, (add x, y)) return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false); @@ -6330,7 +6381,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAG.getConstant(~SignBit, DL, VT)); } case ISD::ADD: - return performADDCombine(N, DAG); + return performADDCombine(N, DAG, Subtarget); case ISD::SUB: return performSUBCombine(N, DAG); case ISD::AND: diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index 2423691..9e1eb48 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -796,16 +796,14 @@ define i32 @addshl_5_6(i32 %a, i32 %b) { ; ; RV32B-LABEL: addshl_5_6: ; RV32B: # %bb.0: +; RV32B-NEXT: sh1add a0, a1, a0 ; RV32B-NEXT: slli a0, a0, 5 -; RV32B-NEXT: slli a1, a1, 6 -; RV32B-NEXT: add a0, a0, a1 ; RV32B-NEXT: ret ; ; RV32ZBA-LABEL: addshl_5_6: ; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: sh1add a0, a1, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 -; RV32ZBA-NEXT: slli a1, a1, 6 -; RV32ZBA-NEXT: add a0, a0, a1 ; RV32ZBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 6 @@ -823,16 +821,14 @@ define i32 @addshl_5_7(i32 %a, i32 %b) { ; ; RV32B-LABEL: addshl_5_7: ; RV32B: # %bb.0: +; RV32B-NEXT: sh2add a0, a1, a0 ; RV32B-NEXT: slli a0, a0, 5 -; RV32B-NEXT: slli a1, a1, 7 -; RV32B-NEXT: add a0, a0, a1 ; RV32B-NEXT: ret ; ; RV32ZBA-LABEL: addshl_5_7: ; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: sh2add a0, a1, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 -; RV32ZBA-NEXT: slli a1, a1, 7 -; RV32ZBA-NEXT: add a0, a0, a1 ; RV32ZBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 7 @@ -850,16 +846,14 @@ define i32 @addshl_5_8(i32 %a, i32 %b) { ; ; RV32B-LABEL: addshl_5_8: ; RV32B: # %bb.0: +; RV32B-NEXT: sh3add a0, a1, a0 ; RV32B-NEXT: slli a0, a0, 5 -; RV32B-NEXT: slli a1, a1, 8 -; RV32B-NEXT: add a0, a0, a1 ; RV32B-NEXT: ret ; ; RV32ZBA-LABEL: addshl_5_8: ; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: sh3add a0, a1, a0 ; RV32ZBA-NEXT: slli a0, a0, 5 -; RV32ZBA-NEXT: slli a1, a1, 8 -; RV32ZBA-NEXT: add a0, a0, a1 ; RV32ZBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 8 diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index c418ce6..32689d2 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1407,16 +1407,14 @@ define signext i32 @addshl32_5_6(i32 signext %a, i32 signext %b) { ; ; RV64B-LABEL: addshl32_5_6: ; RV64B: # %bb.0: +; RV64B-NEXT: sh1add a0, a1, a0 ; RV64B-NEXT: slliw a0, a0, 5 -; RV64B-NEXT: slliw a1, a1, 6 -; RV64B-NEXT: addw a0, a0, a1 ; RV64B-NEXT: ret ; ; RV64ZBA-LABEL: addshl32_5_6: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: slliw a0, a0, 5 -; RV64ZBA-NEXT: slliw a1, a1, 6 -; RV64ZBA-NEXT: addw a0, a0, a1 ; RV64ZBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 6 @@ -1434,16 +1432,14 @@ define i64 @addshl64_5_6(i64 %a, i64 %b) { ; ; RV64B-LABEL: addshl64_5_6: ; RV64B: # %bb.0: +; RV64B-NEXT: sh1add a0, a1, a0 ; RV64B-NEXT: slli a0, a0, 5 -; RV64B-NEXT: slli a1, a1, 6 -; RV64B-NEXT: add a0, a0, a1 ; RV64B-NEXT: ret ; ; RV64ZBA-LABEL: addshl64_5_6: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a0, a1, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 -; RV64ZBA-NEXT: slli a1, a1, 6 -; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 6 @@ -1461,16 +1457,14 @@ define signext i32 @addshl32_5_7(i32 signext %a, i32 signext %b) { ; ; RV64B-LABEL: addshl32_5_7: ; RV64B: # %bb.0: +; RV64B-NEXT: sh2add a0, a1, a0 ; RV64B-NEXT: slliw a0, a0, 5 -; RV64B-NEXT: slliw a1, a1, 7 -; RV64B-NEXT: addw a0, a0, a1 ; RV64B-NEXT: ret ; ; RV64ZBA-LABEL: addshl32_5_7: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: slliw a0, a0, 5 -; RV64ZBA-NEXT: slliw a1, a1, 7 -; RV64ZBA-NEXT: addw a0, a0, a1 ; RV64ZBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 7 @@ -1488,16 +1482,14 @@ define i64 @addshl64_5_7(i64 %a, i64 %b) { ; ; RV64B-LABEL: addshl64_5_7: ; RV64B: # %bb.0: +; RV64B-NEXT: sh2add a0, a1, a0 ; RV64B-NEXT: slli a0, a0, 5 -; RV64B-NEXT: slli a1, a1, 7 -; RV64B-NEXT: add a0, a0, a1 ; RV64B-NEXT: ret ; ; RV64ZBA-LABEL: addshl64_5_7: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a0, a1, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 -; RV64ZBA-NEXT: slli a1, a1, 7 -; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 7 @@ -1515,16 +1507,14 @@ define signext i32 @addshl32_5_8(i32 signext %a, i32 signext %b) { ; ; RV64B-LABEL: addshl32_5_8: ; RV64B: # %bb.0: +; RV64B-NEXT: sh3add a0, a1, a0 ; RV64B-NEXT: slliw a0, a0, 5 -; RV64B-NEXT: slliw a1, a1, 8 -; RV64B-NEXT: addw a0, a0, a1 ; RV64B-NEXT: ret ; ; RV64ZBA-LABEL: addshl32_5_8: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: slliw a0, a0, 5 -; RV64ZBA-NEXT: slliw a1, a1, 8 -; RV64ZBA-NEXT: addw a0, a0, a1 ; RV64ZBA-NEXT: ret %c = shl i32 %a, 5 %d = shl i32 %b, 8 @@ -1542,16 +1532,14 @@ define i64 @addshl64_5_8(i64 %a, i64 %b) { ; ; RV64B-LABEL: addshl64_5_8: ; RV64B: # %bb.0: +; RV64B-NEXT: sh3add a0, a1, a0 ; RV64B-NEXT: slli a0, a0, 5 -; RV64B-NEXT: slli a1, a1, 8 -; RV64B-NEXT: add a0, a0, a1 ; RV64B-NEXT: ret ; ; RV64ZBA-LABEL: addshl64_5_8: ; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a0, a1, a0 ; RV64ZBA-NEXT: slli a0, a0, 5 -; RV64ZBA-NEXT: slli a1, a1, 8 -; RV64ZBA-NEXT: add a0, a0, a1 ; RV64ZBA-NEXT: ret %c = shl i64 %a, 5 %d = shl i64 %b, 8 -- 2.7.4