From 43f668b98e8d87290fc6bbf5ed13c3ab542e3497 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 11 Mar 2022 17:10:03 -0800 Subject: [PATCH] [RISCV] Move GORCIW/GREVIW formation to isel patterns. Type legalize narrow RISCVISD::GREV/GORC with constant to a larger type without switching to W. Detect sext_inreg+gorci/grevi with a uimm5 immediate during isel to emit GREVIW/GORCIW. This allows us to better propagate known bits information through extended bits after type legalization. It will also simplify a change I'm considering for BREV8 with Zbkb. A future patch will add computeKnownBits support for GORC. A further improvement here would be to use hasAllWUsers and doPeepholeSExtW like we do for SLLIW, but I don't think we have the test coverage for that yet. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 92 +++++++++++++++------------- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 10 ++- llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll | 2 +- llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll | 4 +- llvm/test/CodeGen/RISCV/rv64zbp.ll | 20 +++--- 5 files changed, 67 insertions(+), 61 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7a3d973..dc345b6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6394,10 +6394,6 @@ static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) { switch (IntNo) { default: llvm_unreachable("Unexpected Intrinsic"); - case Intrinsic::riscv_grev: - return RISCVISD::GREVW; - case Intrinsic::riscv_gorc: - return RISCVISD::GORCW; case Intrinsic::riscv_bcompress: return RISCVISD::BCOMPRESSW; case Intrinsic::riscv_bdecompress: @@ -6445,10 +6441,6 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) { return RISCVISD::ROLW; case ISD::ROTR: return RISCVISD::RORW; - case RISCVISD::GREV: - return RISCVISD::GREVW; - case RISCVISD::GORC: - return RISCVISD::GORCW; } } @@ -6778,15 +6770,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); assert(isa(N->getOperand(1)) && "Expected constant"); - // This is similar to customLegalizeToWOp, except that we pass the second - // operand (a TargetConstant) straight through: it is already of type - // XLenVT. - RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode()); SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); SDValue NewOp1 = - DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1); + DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewRes = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1); // ReplaceNodeResults requires we maintain the same type for the return // value. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes)); @@ -6819,9 +6807,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits. if (N->getOpcode() == ISD::BSWAP) Imm &= ~0x7U; - unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV; - SDValue GREVI = - DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT)); + SDValue GREVI = DAG.getNode(RISCVISD::GREV, DL, XLenVT, NewOp0, + DAG.getConstant(Imm, DL, XLenVT)); // ReplaceNodeResults requires we maintain the same type for the return // value. Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI)); @@ -6921,7 +6908,27 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, llvm_unreachable( "Don't know how to custom type legalize this intrinsic!"); case Intrinsic::riscv_grev: - case Intrinsic::riscv_gorc: + case Intrinsic::riscv_gorc: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDValue NewOp1 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); + SDValue NewOp2 = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); + unsigned Opc = + IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW; + // If the control is a constant, promote the node by clearing any extra + // bits bits in the control. isel will form greviw/gorciw if the result is + // sign extended. + if (isa(NewOp2)) { + NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2, + DAG.getConstant(0x1f, DL, MVT::i64)); + Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC; + } + SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + break; + } case Intrinsic::riscv_bcompress: case Intrinsic::riscv_bdecompress: case Intrinsic::riscv_bfp: { @@ -6949,10 +6956,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, // Lower to the GORCI encoding for orc.b with the operand extended. SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); - // If Zbp is enabled, use GORCIW which will sign extend the result. - unsigned Opc = - Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC; - SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp, + SDValue Res = DAG.getNode(RISCVISD::GORC, DL, MVT::i64, NewOp, DAG.getConstant(7, DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -7392,12 +7396,12 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG, } // Combine -// ROTR ((GREV x, 24), 16) -> (GREVI x, 8) for RV32 -// ROTL ((GREV x, 24), 16) -> (GREVI x, 8) for RV32 -// ROTR ((GREV x, 56), 32) -> (GREVI x, 24) for RV64 -// ROTL ((GREV x, 56), 32) -> (GREVI x, 24) for RV64 -// RORW ((GREVW x, 24), 16) -> (GREVIW x, 8) for RV64 -// ROLW ((GREVW x, 24), 16) -> (GREVIW x, 8) for RV64 +// ROTR ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32 +// ROTL ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32 +// ROTR ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64 +// ROTL ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64 +// RORW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64 +// ROLW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64 // The grev patterns represents BSWAP. // FIXME: This can be generalized to any GREV. We just need to toggle the MSB // off the grev. @@ -7412,11 +7416,7 @@ static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); SDLoc DL(N); - if (!Subtarget.hasStdExtZbp()) - return SDValue(); - - unsigned GrevOpc = IsWInstruction ? RISCVISD::GREVW : RISCVISD::GREV; - if (Src.getOpcode() != GrevOpc) + if (!Subtarget.hasStdExtZbp() || Src.getOpcode() != RISCVISD::GREV) return SDValue(); if (!isa(N->getOperand(1)) || @@ -7430,7 +7430,7 @@ static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG, // RORW/ROLW. And the grev should be the encoding for bswap for this width. unsigned ShAmt1 = N->getConstantOperandVal(1); unsigned ShAmt2 = Src.getConstantOperandVal(1); - if (BitWidth < 16 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8)) + if (BitWidth < 32 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8)) return SDValue(); Src = Src.getOperand(0); @@ -7440,9 +7440,16 @@ static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG, if (CombinedShAmt == 0) return Src; - return DAG.getNode( - GrevOpc, DL, VT, Src, + SDValue Res = DAG.getNode( + RISCVISD::GREV, DL, VT, Src, DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType())); + if (!IsWInstruction) + return Res; + + // Sign extend the result to match the behavior of the rotate. This will be + // selected to GREVIW in isel. + return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Res, + DAG.getValueType(MVT::i32)); } // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is @@ -7450,6 +7457,8 @@ static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG, // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does // not undo itself, but they are redundant. static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { + bool IsGORC = N->getOpcode() == RISCVISD::GORC; + assert((IsGORC || N->getOpcode() == RISCVISD::GREV) && "Unexpected opcode"); SDValue Src = N->getOperand(0); if (Src.getOpcode() != N->getOpcode()) @@ -7464,7 +7473,7 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) { Src = Src.getOperand(0); unsigned CombinedShAmt; - if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW) + if (IsGORC) CombinedShAmt = ShAmt1 | ShAmt2; else CombinedShAmt = ShAmt1 ^ ShAmt2; @@ -8158,7 +8167,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SimplifyDemandedLowBitsHelper(1, 5)) return SDValue(N, 0); - return combineGREVI_GORCI(N, DAG); + break; } case RISCVISD::SHFL: case RISCVISD::UNSHFL: { @@ -8823,17 +8832,12 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.Zero.setBitsFrom(LowBits); break; } - case RISCVISD::GREV: - case RISCVISD::GREVW: { + case RISCVISD::GREV: { if (auto *C = dyn_cast(Op.getOperand(1))) { Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); - if (Opc == RISCVISD::GREVW) - Known = Known.trunc(32); unsigned ShAmt = C->getZExtValue(); computeGREV(Known.Zero, ShAmt); computeGREV(Known.One, ShAmt); - if (Opc == RISCVISD::GREVW) - Known = Known.sext(BitWidth); } break; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 1608715..d8512de 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -919,8 +919,14 @@ def : PatGprGpr; let Predicates = [HasStdExtZbp, IsRV64] in { def : PatGprGpr; def : PatGprGpr; -def : PatGprImm; -def : PatGprImm; + +// Select GREVIW/GORCIW when the immediate doesn't have bit 5 set and the result +// is sign extended. +// FIXME: Use binop_allwusers and doPeepholeSExtW instead? +def : Pat<(i64 (sext_inreg (binop_oneuse GPR:$rs1, uimm5:$imm), i32)), + (GREVIW GPR:$rs1, uimm5:$imm)>; +def : Pat<(i64 (sext_inreg (binop_oneuse GPR:$rs1, uimm5:$imm), i32)), + (GORCIW GPR:$rs1, uimm5:$imm)>; def : PatGprGpr; def : PatGprGpr; diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll index 0f1ace1..53f1758 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-intrinsic.ll @@ -31,7 +31,7 @@ define zeroext i32 @orcb32_zext(i32 zeroext %a) nounwind { ; ; RV64ZBP-LABEL: orcb32_zext: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: gorciw a0, a0, 7 +; RV64ZBP-NEXT: orc.b a0, a0 ; RV64ZBP-NEXT: slli a0, a0, 32 ; RV64ZBP-NEXT: srli a0, a0, 32 ; RV64ZBP-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll index 5c9e164..5bb2420 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp-intrinsic.ll @@ -37,9 +37,7 @@ define signext i32 @grevi32(i32 signext %a) nounwind { define zeroext i32 @grevi32_zext(i32 zeroext %a) nounwind { ; RV64ZBP-LABEL: grevi32_zext: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 13 -; RV64ZBP-NEXT: slli a0, a0, 32 -; RV64ZBP-NEXT: srli a0, a0, 32 +; RV64ZBP-NEXT: grevi a0, a0, 13 ; RV64ZBP-NEXT: ret %tmp = call i32 @llvm.riscv.grev.i32(i32 %a, i32 13) ret i32 %tmp diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll index 7c551e0..a14053a 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -853,7 +853,7 @@ define i32 @gorc16_rotl_i32(i32 %a) nounwind { ; ; RV64ZBP-LABEL: gorc16_rotl_i32: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: gorciw a0, a0, 16 +; RV64ZBP-NEXT: orc16.w a0, a0 ; RV64ZBP-NEXT: ret %rot = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 16) %or = or i32 %rot, %a @@ -871,7 +871,7 @@ define i32 @gorc16_rotr_i32(i32 %a) nounwind { ; ; RV64ZBP-LABEL: gorc16_rotr_i32: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: gorciw a0, a0, 16 +; RV64ZBP-NEXT: orc16.w a0, a0 ; RV64ZBP-NEXT: ret %rot = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 16) %or = or i32 %rot, %a @@ -1649,9 +1649,7 @@ define zeroext i32 @grev7_i32_zext(i32 zeroext %a) nounwind { ; ; RV64ZBP-LABEL: grev7_i32_zext: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 7 -; RV64ZBP-NEXT: slli a0, a0, 32 -; RV64ZBP-NEXT: srli a0, a0, 32 +; RV64ZBP-NEXT: rev.b a0, a0 ; RV64ZBP-NEXT: ret %and1 = shl i32 %a, 1 %shl1 = and i32 %and1, -1431655766 @@ -2416,7 +2414,7 @@ define zeroext i16 @bswap_i16(i16 zeroext %a) nounwind { ; ; RV64ZBP-LABEL: bswap_i16: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 8 +; RV64ZBP-NEXT: rev8.h a0, a0 ; RV64ZBP-NEXT: ret %1 = tail call i16 @llvm.bswap.i16(i16 %a) ret i16 %1 @@ -2470,7 +2468,7 @@ define void @bswap_i32_nosext(i32 signext %a, i32* %x) nounwind { ; ; RV64ZBP-LABEL: bswap_i32_nosext: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 24 +; RV64ZBP-NEXT: rev8.w a0, a0 ; RV64ZBP-NEXT: sw a0, 0(a1) ; RV64ZBP-NEXT: ret %1 = tail call i32 @llvm.bswap.i32(i32 %a) @@ -2544,7 +2542,7 @@ define zeroext i8 @bitreverse_i8(i8 zeroext %a) nounwind { ; ; RV64ZBP-LABEL: bitreverse_i8: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 7 +; RV64ZBP-NEXT: rev.b a0, a0 ; RV64ZBP-NEXT: ret %1 = tail call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %1 @@ -2583,7 +2581,7 @@ define zeroext i16 @bitreverse_i16(i16 zeroext %a) nounwind { ; ; RV64ZBP-LABEL: bitreverse_i16: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 15 +; RV64ZBP-NEXT: rev.h a0, a0 ; RV64ZBP-NEXT: ret %1 = tail call i16 @llvm.bitreverse.i16(i16 %a) ret i16 %1 @@ -2679,7 +2677,7 @@ define void @bitreverse_i32_nosext(i32 signext %a, i32* %x) nounwind { ; ; RV64ZBP-LABEL: bitreverse_i32_nosext: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 31 +; RV64ZBP-NEXT: rev.w a0, a0 ; RV64ZBP-NEXT: sw a0, 0(a1) ; RV64ZBP-NEXT: ret %1 = tail call i32 @llvm.bitreverse.i32(i32 %a) @@ -2921,7 +2919,7 @@ define i32 @bitreverse_bswap_i32(i32 %a) { ; ; RV64ZBP-LABEL: bitreverse_bswap_i32: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: greviw a0, a0, 7 +; RV64ZBP-NEXT: rev.b a0, a0 ; RV64ZBP-NEXT: ret %1 = call i32 @llvm.bitreverse.i32(i32 %a) %2 = call i32 @llvm.bswap.i32(i32 %1) -- 2.7.4