return true;
}
+bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
+ // Only supported with XTHeadBb at the moment.
+ if (!Subtarget->hasVendorXTHeadBb())
+ return false;
+
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ return false;
+
+ SDValue N0 = Node->getOperand(0);
+ if (!N0.hasOneUse())
+ return false;
+
+ auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
+ MVT VT) {
+ return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Msb, DL, VT),
+ CurDAG->getTargetConstant(Lsb, DL, VT));
+ };
+
+ SDLoc DL(Node);
+ MVT VT = Node->getSimpleValueType(0);
+ const unsigned RightShAmt = N1C->getZExtValue();
+
+ // Transform (sra (shl X, C1) C2) with C1 < C2
+ // -> (TH.EXT X, msb, lsb)
+ if (N0.getOpcode() == ISD::SHL) {
+ auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!N01C)
+ return false;
+
+ const unsigned LeftShAmt = N01C->getZExtValue();
+ // Make sure that this is a bitfield extraction (i.e., the shift-right
+ // amount can not be less than the left-shift).
+ if (LeftShAmt > RightShAmt)
+ return false;
+
+ const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
+ const unsigned Msb = MsbPlusOne - 1;
+ const unsigned Lsb = RightShAmt - LeftShAmt;
+
+ SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
+ ReplaceNode(Node, TH_EXT);
+ return true;
+ }
+
+ // Transform (sra (sext_inreg X, _), C) ->
+ // (TH.EXT X, msb, lsb)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ unsigned ExtSize =
+ cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
+
+ // ExtSize of 32 should use sraiw via tablegen pattern.
+ if (ExtSize == 32)
+ return false;
+
+ const unsigned Msb = ExtSize - 1;
+ const unsigned Lsb = RightShAmt;
+
+ SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
+ ReplaceNode(Node, TH_EXT);
+ return true;
+ }
+
+ return false;
+}
+
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
return;
}
case ISD::SRA: {
+ if (trySignedBitfieldExtract(Node))
+ return;
+
// Optimize (sra (sext_inreg X, i16), C) ->
// (srai (slli X, (XLen-16), (XLen-16) + C)
// And (sra (sext_inreg X, i8), C) ->
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
break;
+ uint64_t C1 = N1C->getZExtValue();
+ const bool isC1Mask = isMask_64(C1);
+ const bool isC1ANDI = isInt<12>(C1);
SDValue N0 = Node->getOperand(0);
+ auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
+ SDValue X, unsigned Msb,
+ unsigned Lsb) {
+ if (!Subtarget->hasVendorXTHeadBb())
+ return false;
+
+ SDNode *TH_EXTU = CurDAG->getMachineNode(
+ RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
+ CurDAG->getTargetConstant(Lsb, DL, VT));
+ ReplaceNode(Node, TH_EXTU);
+ return true;
+ };
+
bool LeftShift = N0.getOpcode() == ISD::SHL;
if (LeftShift || N0.getOpcode() == ISD::SRL) {
auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
unsigned XLen = Subtarget->getXLen();
assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
- uint64_t C1 = N1C->getZExtValue();
-
// Keep track of whether this is a c.andi. If we can't use c.andi, the
// shift pair might offer more compression opportunities.
// TODO: We could check for C extension here, but we don't have many lit
// Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
// with c3 leading zeros.
- if (!LeftShift && isMask_64(C1)) {
+ if (!LeftShift && isC1Mask) {
unsigned Leading = XLen - llvm::bit_width(C1);
if (C2 < Leading) {
// If the number of leading zeros is C2+32 this can be SRLIW.
return;
}
+ // Try to use an unsigned bitfield extract (e.g., th.extu) if
+ // available.
+ // Transform (and (srl x, C2), C1)
+ // -> (<bfextract> x, msb, lsb)
+ //
+ // Make sure to keep this below the SRLIW cases, as we always want to
+ // prefer the more common instruction.
+ const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
+ const unsigned Lsb = C2;
+ if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
+ return;
+
// (srli (slli x, c3-c2), c3).
// Skip if we could use (zext.w (sraiw X, C2)).
bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
}
}
+ // If C1 masks off the upper bits only (but can't be formed as an
+ // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
+ // available.
+ // Transform (and x, C1)
+ // -> (<bfextract> x, msb, lsb)
+ if (isC1Mask && !isC1ANDI) {
+ const unsigned Msb = llvm::bit_width(C1) - 1;
+ if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
+ return;
+ }
+
if (tryShrinkShlLogicImm(Node))
return;
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
bool tryShrinkShlLogicImm(SDNode *Node);
+ bool trySignedBitfieldExtract(SDNode *Node);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) {
def : Pat<(rotl GPR:$rs1, GPR:$rs2),
(OR (SLL GPR:$rs1, GPR:$rs2),
(SRL GPR:$rs1, (SUB X0, GPR:$rs2)))>;
-//def : Pat<(and GPR:$rs1, 1), (TH_EXTU GPR:$rs1, 0, 0)>;
-//def : Pat<(and GPR:$rs1, 0xff), (TH_EXTU GPR:$rs1, 7, 0)>;
-def : Pat<(and GPR:$rs1, 0xffff), (TH_EXTU GPR:$rs1, 15, 0)>;
-def : Pat<(and GPR:$rs1, 0xffffffff), (TH_EXTU GPR:$rs1, 31, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i32), (TH_EXT GPR:$rs1, 31, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i16), (TH_EXT GPR:$rs1, 15, 0)>;
def : Pat<(sext_inreg GPR:$rs1, i8), (TH_EXT GPR:$rs1, 7, 0)>;
; RV32XTHEADBB-LABEL: f:
; RV32XTHEADBB: # %bb.0: # %entry
; RV32XTHEADBB-NEXT: mul a0, a1, a0
-; RV32XTHEADBB-NEXT: slli a1, a0, 26
-; RV32XTHEADBB-NEXT: srli a1, a1, 28
-; RV32XTHEADBB-NEXT: slli a0, a0, 20
-; RV32XTHEADBB-NEXT: srli a0, a0, 25
+; RV32XTHEADBB-NEXT: th.extu a1, a0, 5, 2
+; RV32XTHEADBB-NEXT: th.extu a0, a0, 11, 5
; RV32XTHEADBB-NEXT: mul a0, a1, a0
; RV32XTHEADBB-NEXT: add a0, a0, a2
; RV32XTHEADBB-NEXT: ret
; RV32XTHEAD-LABEL: f:
; RV32XTHEAD: # %bb.0: # %entry
; RV32XTHEAD-NEXT: mul a0, a1, a0
-; RV32XTHEAD-NEXT: slli a1, a0, 26
-; RV32XTHEAD-NEXT: srli a1, a1, 28
-; RV32XTHEAD-NEXT: slli a0, a0, 20
-; RV32XTHEAD-NEXT: srli a0, a0, 25
+; RV32XTHEAD-NEXT: th.extu a1, a0, 5, 2
+; RV32XTHEAD-NEXT: th.extu a0, a0, 11, 5
; RV32XTHEAD-NEXT: th.mulah a2, a1, a0
; RV32XTHEAD-NEXT: mv a0, a2
; RV32XTHEAD-NEXT: ret
;
; RV64XTHEADBB-LABEL: f:
; RV64XTHEADBB: # %bb.0: # %entry
-; RV64XTHEADBB-NEXT: mulw a0, a1, a0
-; RV64XTHEADBB-NEXT: slli a1, a0, 58
-; RV64XTHEADBB-NEXT: srli a1, a1, 60
-; RV64XTHEADBB-NEXT: slli a0, a0, 52
-; RV64XTHEADBB-NEXT: srli a0, a0, 57
+; RV64XTHEADBB-NEXT: mul a0, a1, a0
+; RV64XTHEADBB-NEXT: th.extu a1, a0, 5, 2
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 11, 5
; RV64XTHEADBB-NEXT: mulw a0, a1, a0
; RV64XTHEADBB-NEXT: addw a0, a0, a2
; RV64XTHEADBB-NEXT: ret
;
; RV64XTHEAD-LABEL: f:
; RV64XTHEAD: # %bb.0: # %entry
-; RV64XTHEAD-NEXT: mulw a0, a1, a0
-; RV64XTHEAD-NEXT: slli a1, a0, 58
-; RV64XTHEAD-NEXT: srli a1, a1, 60
-; RV64XTHEAD-NEXT: slli a0, a0, 52
-; RV64XTHEAD-NEXT: srli a0, a0, 57
+; RV64XTHEAD-NEXT: mul a0, a1, a0
+; RV64XTHEAD-NEXT: th.extu a1, a0, 5, 2
+; RV64XTHEAD-NEXT: th.extu a0, a0, 11, 5
; RV64XTHEAD-NEXT: th.mulah a2, a1, a0
; RV64XTHEAD-NEXT: mv a0, a2
; RV64XTHEAD-NEXT: ret
;
; RV32XTHEADBB-LABEL: rotl_64_mask_shared:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: slli a5, a4, 26
-; RV32XTHEADBB-NEXT: srli a5, a5, 31
+; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5
; RV32XTHEADBB-NEXT: mv a7, a0
; RV32XTHEADBB-NEXT: bnez a5, .LBB17_2
; RV32XTHEADBB-NEXT: # %bb.1:
;
; RV32XTHEADBB-LABEL: rotl_64_mask_multiple:
; RV32XTHEADBB: # %bb.0:
-; RV32XTHEADBB-NEXT: slli a5, a4, 26
-; RV32XTHEADBB-NEXT: srli a5, a5, 31
+; RV32XTHEADBB-NEXT: th.extu a5, a4, 5, 5
; RV32XTHEADBB-NEXT: mv a6, a1
; RV32XTHEADBB-NEXT: bnez a5, .LBB21_2
; RV32XTHEADBB-NEXT: # %bb.1:
ret i32 %shr
}
+define i32 @no_sexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: no_sexth_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a0, a0, 17
+; RV32I-NEXT: srai a0, a0, 16
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: no_sexth_i32:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: slli a0, a0, 17
+; RV32XTHEADBB-NEXT: srai a0, a0, 16
+; RV32XTHEADBB-NEXT: ret
+ %shl = shl i32 %a, 17
+ %shr = ashr exact i32 %shl, 16
+ ret i32 %shr
+}
+
define i64 @sexth_i64(i64 %a) nounwind {
; RV32I-LABEL: sexth_i64:
; RV32I: # %bb.0:
ret i64 %shr
}
+define i64 @no_sexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: no_sexth_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: slli a1, a0, 17
+; RV32I-NEXT: srai a0, a1, 16
+; RV32I-NEXT: srai a1, a1, 31
+; RV32I-NEXT: ret
+;
+; RV32XTHEADBB-LABEL: no_sexth_i64:
+; RV32XTHEADBB: # %bb.0:
+; RV32XTHEADBB-NEXT: slli a1, a0, 17
+; RV32XTHEADBB-NEXT: srai a0, a1, 16
+; RV32XTHEADBB-NEXT: srai a1, a1, 31
+; RV32XTHEADBB-NEXT: ret
+ %shl = shl i64 %a, 49
+ %shr = ashr exact i64 %shl, 48
+ ret i64 %shr
+}
+
define i32 @zexth_i32(i32 %a) nounwind {
; RV32I-LABEL: zexth_i32:
; RV32I: # %bb.0:
ret i32 %shr
}
+define signext i32 @no_sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: no_sexth_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 49
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: no_sexth_i32:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: slli a0, a0, 49
+; RV64XTHEADBB-NEXT: srai a0, a0, 48
+; RV64XTHEADBB-NEXT: ret
+ %shl = shl i32 %a, 17
+ %shr = ashr exact i32 %shl, 16
+ ret i32 %shr
+}
+
define i64 @sexth_i64(i64 %a) nounwind {
; RV64I-LABEL: sexth_i64:
; RV64I: # %bb.0:
ret i64 %shr
}
+define i64 @no_sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: no_sexth_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 49
+; RV64I-NEXT: srai a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: no_sexth_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: slli a0, a0, 49
+; RV64XTHEADBB-NEXT: srai a0, a0, 48
+; RV64XTHEADBB-NEXT: ret
+ %shl = shl i64 %a, 49
+ %shr = ashr exact i64 %shl, 48
+ ret i64 %shr
+}
+
define i32 @zexth_i32(i32 %a) nounwind {
; RV64I-LABEL: zexth_i32:
; RV64I: # %bb.0:
ret i64 %and
}
+define i64 @zext_bf_i64(i64 %a) nounwind {
+; RV64I-LABEL: zext_bf_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a0, a0, 47
+; RV64I-NEXT: srli a0, a0, 48
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: zext_bf_i64:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: th.extu a0, a0, 16, 1
+; RV64XTHEADBB-NEXT: ret
+ %1 = lshr i64 %a, 1
+ %and = and i64 %1, 65535
+ ret i64 %and
+}
+
+define i64 @zext_i64_srliw(i64 %a) nounwind {
+; RV64I-LABEL: zext_i64_srliw:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a0, a0, 16
+; RV64I-NEXT: ret
+;
+; RV64XTHEADBB-LABEL: zext_i64_srliw:
+; RV64XTHEADBB: # %bb.0:
+; RV64XTHEADBB-NEXT: srliw a0, a0, 16
+; RV64XTHEADBB-NEXT: ret
+ %1 = lshr i64 %a, 16
+ %and = and i64 %1, 65535
+ ret i64 %and
+}
+
declare i32 @llvm.bswap.i32(i32)
define signext i32 @bswap_i32(i32 signext %a) nounwind {