From b2e9684fe4d10dbdd7679a29c6f971dc59ede3dd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 2 Jul 2022 00:57:35 -0700 Subject: [PATCH] [RISCV] isel (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C). where C2 has 32 leading zeros and C3 trailing zeros. When the shl is used by an add C is 1,2 or 3, we end up matching (add (shl X, C), Y) first. This leaves an and with a constant that is harder to materialize. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 30 ++++++++++++ llvm/test/CodeGen/RISCV/rv64zba.ll | 75 +++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index b065170..f7161e9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -728,6 +728,36 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, ADDI); return; } + case ISD::SHL: { + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + break; + SDValue N0 = Node->getOperand(0); + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() || + !isa(N0.getOperand(1))) + break; + unsigned ShAmt = N1C->getZExtValue(); + uint64_t Mask = N0.getConstantOperandVal(1); + + // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has + // 32 leading zeros and C3 trailing zeros. + if (ShAmt <= 32 && isShiftedMask_64(Mask)) { + unsigned XLen = Subtarget->getXLen(); + unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask)); + unsigned TrailingZeros = countTrailingZeros(Mask); + if (TrailingZeros > 0 && LeadingZeros == 32) { + SDNode *SRLIW = CurDAG->getMachineNode( + RISCV::SRLIW, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(TrailingZeros, DL, VT)); + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, SDValue(SRLIW, 0), + CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT)); + ReplaceNode(Node, SLLI); + return; + } + } + break; + } case ISD::SRL: { auto *N1C = dyn_cast(Node->getOperand(1)); if (!N1C) diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 659ba92..c6af0b7 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -1309,3 +1309,78 @@ define i64 @srliw_3_sh3add(i64* %0, i32 signext %1) { %6 = load i64, i64* %5, align 8 ret i64 %6 } + +; FIXME: This should select sh2add with Zba. +define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) { +; RV64I-LABEL: srliw_1_sh2add: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a1, 1 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srliw_1_sh2add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: li a2, -2 +; RV64ZBA-NEXT: zext.w a2, a2 +; RV64ZBA-NEXT: and a1, a1, a2 +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: lw a0, 0(a0) +; RV64ZBA-NEXT: ret + %3 = lshr i32 %1, 1 + %4 = zext i32 %3 to i64 + %5 = getelementptr inbounds i32, i32* %0, i64 %4 + %6 = load i32, i32* %5, align 4 + ret i32 %6 +} + +; FIXME: This should select sh3add with Zba. +define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) { +; RV64I-LABEL: srliw_1_sh3add: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a1, 1 +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srliw_1_sh3add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: li a2, -2 +; RV64ZBA-NEXT: zext.w a2, a2 +; RV64ZBA-NEXT: and a1, a1, a2 +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %3 = lshr i32 %1, 1 + %4 = zext i32 %3 to i64 + %5 = getelementptr inbounds i64, i64* %0, i64 %4 + %6 = load i64, i64* %5, align 8 + ret i64 %6 +} + +; FIXME: This should select sh3add with Zba. +define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) { +; RV64I-LABEL: srliw_2_sh3add: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a1, a1, 2 +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: srliw_2_sh3add: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: li a2, -4 +; RV64ZBA-NEXT: zext.w a2, a2 +; RV64ZBA-NEXT: and a1, a1, a2 +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: ld a0, 0(a0) +; RV64ZBA-NEXT: ret + %3 = lshr i32 %1, 2 + %4 = zext i32 %3 to i64 + %5 = getelementptr inbounds i64, i64* %0, i64 %4 + %6 = load i64, i64* %5, align 8 + ret i64 %6 +} -- 2.7.4