From 10b7cd660c73f704ad8610828e80b33c241451df Mon Sep 17 00:00:00 2001 From: Philipp Tomsich Date: Fri, 17 Feb 2023 21:11:51 +0100 Subject: [PATCH] [RISCV] Select signed and unsigned bitfield extracts for XTHeadBb The XTHeadBb extension hab both signed and unsigned bitfield extraction instructions (TH.EXT and TH.EXTU, respectively) which have previously only been supported for sign extension on byte, halfword, and word-boundaries. This adds the infrastructure to use TH.EXT and TH.EXTU for arbitrary bitfield extraction. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D144229 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 113 +++++++++++++++++++++++++- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 + llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td | 4 - llvm/test/CodeGen/RISCV/bitextract-mac.ll | 28 +++---- llvm/test/CodeGen/RISCV/rotl-rotr.ll | 6 +- llvm/test/CodeGen/RISCV/rv32xtheadbb.ll | 36 ++++++++ llvm/test/CodeGen/RISCV/rv64xtheadbb.ll | 65 +++++++++++++++ 7 files changed, 224 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cd9831e..4a7ba8b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -661,6 +661,73 @@ bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) { return true; } +bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) { + // Only supported with XTHeadBb at the moment. + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + auto *N1C = dyn_cast(Node->getOperand(1)); + if (!N1C) + return false; + + SDValue N0 = Node->getOperand(0); + if (!N0.hasOneUse()) + return false; + + auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL, + MVT VT) { + return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + }; + + SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); + const unsigned RightShAmt = N1C->getZExtValue(); + + // Transform (sra (shl X, C1) C2) with C1 < C2 + // -> (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SHL) { + auto *N01C = dyn_cast(N0->getOperand(1)); + if (!N01C) + return false; + + const unsigned LeftShAmt = N01C->getZExtValue(); + // Make sure that this is a bitfield extraction (i.e., the shift-right + // amount can not be less than the left-shift). + if (LeftShAmt > RightShAmt) + return false; + + const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt; + const unsigned Msb = MsbPlusOne - 1; + const unsigned Lsb = RightShAmt - LeftShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + // Transform (sra (sext_inreg X, _), C) -> + // (TH.EXT X, msb, lsb) + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + unsigned ExtSize = + cast(N0.getOperand(1))->getVT().getSizeInBits(); + + // ExtSize of 32 should use sraiw via tablegen pattern. + if (ExtSize == 32) + return false; + + const unsigned Msb = ExtSize - 1; + const unsigned Lsb = RightShAmt; + + SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT); + ReplaceNode(Node, TH_EXT); + return true; + } + + return false; +} + void RISCVDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we have already selected. if (Node->isMachineOpcode()) { @@ -846,6 +913,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } case ISD::SRA: { + if (trySignedBitfieldExtract(Node)) + return; + // Optimize (sra (sext_inreg X, i16), C) -> // (srai (slli X, (XLen-16), (XLen-16) + C) // And (sra (sext_inreg X, i8), C) -> @@ -886,9 +956,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { auto *N1C = dyn_cast(Node->getOperand(1)); if (!N1C) break; + uint64_t C1 = N1C->getZExtValue(); + const bool isC1Mask = isMask_64(C1); + const bool isC1ANDI = isInt<12>(C1); SDValue N0 = Node->getOperand(0); + auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT, + SDValue X, unsigned Msb, + unsigned Lsb) { + if (!Subtarget->hasVendorXTHeadBb()) + return false; + + SDNode *TH_EXTU = CurDAG->getMachineNode( + RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT), + CurDAG->getTargetConstant(Lsb, DL, VT)); + ReplaceNode(Node, TH_EXTU); + return true; + }; + bool LeftShift = N0.getOpcode() == ISD::SHL; if (LeftShift || N0.getOpcode() == ISD::SRL) { auto *C = dyn_cast(N0.getOperand(1)); @@ -898,8 +984,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned XLen = Subtarget->getXLen(); assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!"); - uint64_t C1 = N1C->getZExtValue(); - // Keep track of whether this is a c.andi. If we can't use c.andi, the // shift pair might offer more compression opportunities. // TODO: We could check for C extension here, but we don't have many lit @@ -922,7 +1006,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask // with c3 leading zeros. - if (!LeftShift && isMask_64(C1)) { + if (!LeftShift && isC1Mask) { unsigned Leading = XLen - llvm::bit_width(C1); if (C2 < Leading) { // If the number of leading zeros is C2+32 this can be SRLIW. @@ -951,6 +1035,18 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } + // Try to use an unsigned bitfield extract (e.g., th.extu) if + // available. + // Transform (and (srl x, C2), C1) + // -> ( x, msb, lsb) + // + // Make sure to keep this below the SRLIW cases, as we always want to + // prefer the more common instruction. + const unsigned Msb = llvm::bit_width(C1) + C2 - 1; + const unsigned Lsb = C2; + if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb)) + return; + // (srli (slli x, c3-c2), c3). // Skip if we could use (zext.w (sraiw X, C2)). bool Skip = Subtarget->hasStdExtZba() && Leading == 32 && @@ -1068,6 +1164,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } } + // If C1 masks off the upper bits only (but can't be formed as an + // ANDI), use an unsigned bitfield extract (e.g., th.extu), if + // available. + // Transform (and x, C1) + // -> ( x, msb, lsb) + if (isC1Mask && !isC1ANDI) { + const unsigned Msb = llvm::bit_width(C1) - 1; + if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0)) + return; + } + if (tryShrinkShlLogicImm(Node)) return; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 8195bce..223282e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -51,6 +51,7 @@ public: bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool tryShrinkShlLogicImm(SDNode *Node); + bool trySignedBitfieldExtract(SDNode *Node); bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td index e3dbc67..21fb748 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -315,10 +315,6 @@ def : Pat<(rotr GPR:$rs1, GPR:$rs2), def : Pat<(rotl GPR:$rs1, GPR:$rs2), (OR (SLL GPR:$rs1, GPR:$rs2), (SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>; -//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>; -//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>; -def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>; -def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>; def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>; def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>; diff --git a/llvm/test/CodeGen/RISCV/bitextract-mac.ll b/llvm/test/CodeGen/RISCV/bitextract-mac.ll index 0e5cc8c..f0abc37 100644 --- a/llvm/test/CodeGen/RISCV/bitextract-mac.ll +++ b/llvm/test/CodeGen/RISCV/bitextract-mac.ll @@ -46,10 +46,8 @@ define i32 @f(i32 %A, i32 %B, i32 %C) { ; RV32XTHEADBB-LABEL: f: ; RV32XTHEADBB: # %bb.0: # %entry ; RV32XTHEADBB-NEXT: mul a0, a1, a0 -; RV32XTHEADBB-NEXT: slli a1, a0, 26 -; RV32XTHEADBB-NEXT: srli a1, a1, 28 -; RV32XTHEADBB-NEXT: slli a0, a0, 20 -; RV32XTHEADBB-NEXT: srli a0, a0, 25 +; RV32XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV32XTHEADBB-NEXT: th.extu a0, a0, 11, 5 ; RV32XTHEADBB-NEXT: mul a0, a1, a0 ; RV32XTHEADBB-NEXT: add a0, a0, a2 ; RV32XTHEADBB-NEXT: ret @@ -68,10 +66,8 @@ define i32 @f(i32 %A, i32 %B, i32 %C) { ; RV32XTHEAD-LABEL: f: ; RV32XTHEAD: # %bb.0: # %entry ; RV32XTHEAD-NEXT: mul a0, a1, a0 -; RV32XTHEAD-NEXT: slli a1, a0, 26 -; RV32XTHEAD-NEXT: srli a1, a1, 28 -; RV32XTHEAD-NEXT: slli a0, a0, 20 -; RV32XTHEAD-NEXT: srli a0, a0, 25 +; RV32XTHEAD-NEXT: th.extu a1, a0, 5, 2 +; RV32XTHEAD-NEXT: th.extu a0, a0, 11, 5 ; RV32XTHEAD-NEXT: th.mulah a2, a1, a0 ; RV32XTHEAD-NEXT: mv a0, a2 ; RV32XTHEAD-NEXT: ret @@ -111,22 +107,18 @@ define i32 @f(i32 %A, i32 %B, i32 %C) { ; ; RV64XTHEADBB-LABEL: f: ; RV64XTHEADBB: # %bb.0: # %entry -; RV64XTHEADBB-NEXT: mulw a0, a1, a0 -; RV64XTHEADBB-NEXT: slli a1, a0, 58 -; RV64XTHEADBB-NEXT: srli a1, a1, 60 -; RV64XTHEADBB-NEXT: slli a0, a0, 52 -; RV64XTHEADBB-NEXT: srli a0, a0, 57 +; RV64XTHEADBB-NEXT: mul a0, a1, a0 +; RV64XTHEADBB-NEXT: th.extu a1, a0, 5, 2 +; RV64XTHEADBB-NEXT: th.extu a0, a0, 11, 5 ; RV64XTHEADBB-NEXT: mulw a0, a1, a0 ; RV64XTHEADBB-NEXT: addw a0, a0, a2 ; RV64XTHEADBB-NEXT: ret ; ; RV64XTHEAD-LABEL: f: ; RV64XTHEAD: # %bb.0: # %entry -; RV64XTHEAD-NEXT: mulw a0, a1, a0 -; RV64XTHEAD-NEXT: slli a1, a0, 58 -; RV64XTHEAD-NEXT: srli a1, a1, 60 -; RV64XTHEAD-NEXT: slli a0, a0, 52 -; RV64XTHEAD-NEXT: srli a0, a0, 57 +; RV64XTHEAD-NEXT: mul a0, a1, a0 +; RV64XTHEAD-NEXT: th.extu a1, a0, 5, 2 +; RV64XTHEAD-NEXT: th.extu a0, a0, 11, 5 ; RV64XTHEAD-NEXT: th.mulah a2, a1, a0 ; RV64XTHEAD-NEXT: mv a0, a2 ; RV64XTHEAD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index 132e73e..a33cbdf 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -1623,8 +1623,7 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; ; RV32XTHEADBB-LABEL: rotl_64_mask_shared: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a7, a0 ; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2 ; RV32XTHEADBB-NEXT: # %bb.1: @@ -2098,8 +2097,7 @@ define i64 @rotl_64_mask_multiple(i64 %a, i64 %b, i64 %amt) nounwind { ; ; RV32XTHEADBB-LABEL: rotl_64_mask_multiple: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: slli a5, a4, 26 -; RV32XTHEADBB-NEXT: srli a5, a5, 31 +; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5 ; RV32XTHEADBB-NEXT: mv a6, a1 ; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2 ; RV32XTHEADBB-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll index 2e3156d..321d9c6 100644 --- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll @@ -338,6 +338,23 @@ define i32 @sexth_i32(i32 %a) nounwind { ret i32 %shr } +define i32 @no_sexth_i32(i32 %a) nounwind { +; RV32I-LABEL: no_sexth_i32: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 17 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: no_sexth_i32: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a0, a0, 17 +; RV32XTHEADBB-NEXT: srai a0, a0, 16 +; RV32XTHEADBB-NEXT: ret + %shl = shl i32 %a, 17 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + define i64 @sexth_i64(i64 %a) nounwind { ; RV32I-LABEL: sexth_i64: ; RV32I: # %bb.0: @@ -356,6 +373,25 @@ define i64 @sexth_i64(i64 %a) nounwind { ret i64 %shr } +define i64 @no_sexth_i64(i64 %a) nounwind { +; RV32I-LABEL: no_sexth_i64: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a0, 17 +; RV32I-NEXT: srai a0, a1, 16 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32XTHEADBB-LABEL: no_sexth_i64: +; RV32XTHEADBB: # %bb.0: +; RV32XTHEADBB-NEXT: slli a1, a0, 17 +; RV32XTHEADBB-NEXT: srai a0, a1, 16 +; RV32XTHEADBB-NEXT: srai a1, a1, 31 +; RV32XTHEADBB-NEXT: ret + %shl = shl i64 %a, 49 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + define i32 @zexth_i32(i32 %a) nounwind { ; RV32I-LABEL: zexth_i32: ; RV32I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll index 6b032d3..f5dd546 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadbb.ll @@ -627,6 +627,23 @@ define signext i32 @sexth_i32(i32 signext %a) nounwind { ret i32 %shr } +define signext i32 @no_sexth_i32(i32 signext %a) nounwind { +; RV64I-LABEL: no_sexth_i32: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 49 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: no_sexth_i32: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: slli a0, a0, 49 +; RV64XTHEADBB-NEXT: srai a0, a0, 48 +; RV64XTHEADBB-NEXT: ret + %shl = shl i32 %a, 17 + %shr = ashr exact i32 %shl, 16 + ret i32 %shr +} + define i64 @sexth_i64(i64 %a) nounwind { ; RV64I-LABEL: sexth_i64: ; RV64I: # %bb.0: @@ -643,6 +660,23 @@ define i64 @sexth_i64(i64 %a) nounwind { ret i64 %shr } +define i64 @no_sexth_i64(i64 %a) nounwind { +; RV64I-LABEL: no_sexth_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 49 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: no_sexth_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: slli a0, a0, 49 +; RV64XTHEADBB-NEXT: srai a0, a0, 48 +; RV64XTHEADBB-NEXT: ret + %shl = shl i64 %a, 49 + %shr = ashr exact i64 %shl, 48 + ret i64 %shr +} + define i32 @zexth_i32(i32 %a) nounwind { ; RV64I-LABEL: zexth_i32: ; RV64I: # %bb.0: @@ -673,6 +707,37 @@ define i64 @zexth_i64(i64 %a) nounwind { ret i64 %and } +define i64 @zext_bf_i64(i64 %a) nounwind { +; RV64I-LABEL: zext_bf_i64: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 47 +; RV64I-NEXT: srli a0, a0, 48 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: zext_bf_i64: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: th.extu a0, a0, 16, 1 +; RV64XTHEADBB-NEXT: ret + %1 = lshr i64 %a, 1 + %and = and i64 %1, 65535 + ret i64 %and +} + +define i64 @zext_i64_srliw(i64 %a) nounwind { +; RV64I-LABEL: zext_i64_srliw: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a0, a0, 16 +; RV64I-NEXT: ret +; +; RV64XTHEADBB-LABEL: zext_i64_srliw: +; RV64XTHEADBB: # %bb.0: +; RV64XTHEADBB-NEXT: srliw a0, a0, 16 +; RV64XTHEADBB-NEXT: ret + %1 = lshr i64 %a, 16 + %and = and i64 %1, 65535 + ret i64 %and +} + declare i32 @llvm.bswap.i32(i32) define signext i32 @bswap_i32(i32 signext %a) nounwind { -- 2.7.4