break;
}
+ case ISD::AND: {
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+
+ SDValue N0 = Node->getOperand(0);
+
+ bool LeftShift = N0.getOpcode() == ISD::SHL;
+ if (!LeftShift && N0.getOpcode() != ISD::SRL)
+ break;
+
+ auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C)
+ break;
+ uint64_t C2 = C->getZExtValue();
+ unsigned XLen = Subtarget->getXLen();
+ if (!C2 || C2 >= XLen)
+ break;
+
+ uint64_t C1 = N1C->getZExtValue();
+
+ // Keep track of whether this is a andi, zext.h, or zext.w.
+ bool ZExtOrANDI = isInt<12>(N1C->getSExtValue());
+ if (C1 == UINT64_C(0xFFFF) &&
+ (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()))
+ ZExtOrANDI = true;
+ if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())
+ ZExtOrANDI = true;
+
+ // Clear irrelevant bits in the mask.
+ if (LeftShift)
+ C1 &= maskTrailingZeros<uint64_t>(C2);
+ else
+ C1 &= maskTrailingOnes<uint64_t>(XLen - C2);
+
+ // Some transforms should only be done if the shift has a single use or
+ // the AND would become (srli (slli X, 32), 32)
+ bool OneUseOrZExtW = N0.hasOneUse() || C1 == UINT64_C(0xFFFFFFFF);
+
+ SDValue X = N0.getOperand(0);
+
+ // Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
+ // with c3 leading zeros.
+ if (!LeftShift && isMask_64(C1)) {
+ uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
+ if (C2 < C3) {
+ // If the number of leading zeros is C2+32 this can be SRLIW.
+ if (C2 + 32 == C3) {
+ SDNode *SRLIW =
+ CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2, DL, XLenVT));
+ ReplaceNode(Node, SRLIW);
+ return;
+ }
+
+ // (and (srl (sexti32 Y), c2), c1) -> (srliw (sraiw Y, 31), c3 - 32) if
+ // c1 is a mask with c3 leading zeros and c2 >= 32 and c3-c2==1.
+ //
+ // This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
+ // legalized and goes through DAG combine.
+ SDValue Y;
+ if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
+ selectSExti32(X, Y)) {
+ SDNode *SRAIW =
+ CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y,
+ CurDAG->getTargetConstant(31, DL, XLenVT));
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
+ CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
+ ReplaceNode(Node, SRLIW);
+ return;
+ }
+
+ // (srli (slli x, c3-c2), c3).
+ if (OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ SDNode *SRLI =
+ CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SRLI);
+ return;
+ }
+ }
+ }
+
+ // Turn (and (shl x, c2) c1) -> (srli (slli c2+c3), c3) if c1 is a mask
+ // shifted by c2 bits with c3 leading zeros.
+ if (LeftShift && isShiftedMask_64(C1)) {
+ uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
+
+ if (C2 + C3 < XLen &&
+ C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
+ // Use slli.uw when possible.
+ if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
+ SDNode *SLLIUW =
+ CurDAG->getMachineNode(RISCV::SLLIUW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2, DL, XLenVT));
+ ReplaceNode(Node, SLLIUW);
+ return;
+ }
+
+ // (srli (slli c2+c3), c3)
+ if (OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SDNode *SRLI =
+ CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SRLI);
+ return;
+ }
+ }
+ }
+
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
return false;
}
-// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
-// on RV64).
-// SLLIUW is the same as SLLI except for the fact that it clears the bits
-// XLEN-1:32 of the input RS1 before shifting.
-// A PatFrag has already checked that it has the right structure:
-//
-// (AND (SHL RS1, VC2), VC1)
-//
-// We check that VC2, the shamt is less than 32, otherwise the pattern is
-// exactly the same as SLLI and we give priority to that.
-// Eventually we check that VC1, the mask used to clear the upper 32 bits
-// of RS1, is correct:
-//
-// VC1 == (0xFFFFFFFF << VC2)
-//
-bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
- assert(N->getOpcode() == ISD::AND);
- assert(N->getOperand(0).getOpcode() == ISD::SHL);
- assert(isa<ConstantSDNode>(N->getOperand(1)));
- assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
- // The IsRV64 predicate is checked after PatFrag predicates so we can get
- // here even on RV32.
- if (!Subtarget->is64Bit())
- return false;
-
- SDValue Shl = N->getOperand(0);
- uint64_t VC1 = N->getConstantOperandVal(1);
- uint64_t VC2 = Shl.getConstantOperandVal(1);
-
- // Immediate range should be enforced by uimm5 predicate.
- assert(VC2 < 32 && "Unexpected immediate");
- return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
-}
-
// Select VL as a 5 bit immediate or a value that will become a register. This
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
bool selectSExti32(SDValue N, SDValue &Val);
bool selectZExti32(SDValue N, SDValue &Val);
- bool MatchSLLIUW(SDNode *N) const;
-
bool selectVLOp(SDValue N, SDValue &VL);
bool selectVSplat(SDValue N, SDValue &SplatVal);
}]>;
def zexti32 : ComplexPattern<i64, 1, "selectZExti32">;
-// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
-// on RV64). Also used to optimize the same sequence without SLLIUW.
-def SLLIUWPat : PatFrag<(ops node:$A, node:$B),
- (and (shl node:$A, node:$B), imm), [{
- return MatchSLLIUW(N);
-}]>;
-
def add_oneuse : PatFrag<(ops node:$A, node:$B), (add node:$A, node:$B), [{
return N->hasOneUse();
}]>;
// shifts instead of 3. This can occur when unsigned is used to index an array.
def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
-// shl/and can appear in the other order too.
-def : Pat<(i64 (SLLIUWPat GPR:$rs1, uimm5:$shamt)),
- (SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
-
-// If we're shifting a value left by 0-31 bits, and then masking to 32-bits,
-// use 2 shifts instead of 3.
-def : Pat<(i64 (and (shl GPR:$rs1, uimm5:$shamt), 0xffffffff)),
- (SRLI (SLLI GPR:$rs1, (ImmPlus32 uimm5:$shamt)), 32)>;
}
let Predicates = [IsRV64] in {
} // Predicates = [HasStdExtZba]
let Predicates = [HasStdExtZba, IsRV64] in {
-def : Pat<(i64 (SLLIUWPat GPR:$rs1, uimm5:$shamt)),
- (SLLIUW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)),
(SLLIUW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)),
(SH3ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 1)), non_imm12:$rs2)),
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)),
(SH1ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 2)), non_imm12:$rs2)),
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)),
(SH2ADDUW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (add (SLLIUWPat GPR:$rs1, (i64 3)), non_imm12:$rs2)),
+def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)),
(SH3ADDUW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZba, IsRV64]
;
; RV64I-LABEL: sdiv_pow2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: srli a1, a1, 60
-; RV64I-NEXT: andi a1, a1, 7
+; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srliw a1, a1, 29
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 3
; RV64I-NEXT: ret
;
; RV64IM-LABEL: sdiv_pow2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sext.w a1, a0
-; RV64IM-NEXT: srli a1, a1, 60
-; RV64IM-NEXT: andi a1, a1, 7
+; RV64IM-NEXT: sraiw a1, a0, 31
+; RV64IM-NEXT: srliw a1, a1, 29
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: sraiw a0, a0, 3
; RV64IM-NEXT: ret
;
; RV64I-LABEL: sdiv_pow2_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: srli a1, a1, 47
-; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a2, a2, -1
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srliw a1, a1, 16
; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: sraiw a0, a0, 16
; RV64I-NEXT: ret
;
; RV64IM-LABEL: sdiv_pow2_2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sext.w a1, a0
-; RV64IM-NEXT: srli a1, a1, 47
-; RV64IM-NEXT: lui a2, 16
-; RV64IM-NEXT: addiw a2, a2, -1
-; RV64IM-NEXT: and a1, a1, a2
+; RV64IM-NEXT: sraiw a1, a0, 31
+; RV64IM-NEXT: srliw a1, a1, 16
; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: sraiw a0, a0, 16
; RV64IM-NEXT: ret
; RV64IM-NEXT: lui a1, 6
; RV64IM-NEXT: addiw a1, a1, 1639
; RV64IM-NEXT: mul a0, a0, a1
-; RV64IM-NEXT: srai a1, a0, 17
-; RV64IM-NEXT: srli a0, a0, 31
-; RV64IM-NEXT: andi a0, a0, 1
-; RV64IM-NEXT: add a0, a1, a0
+; RV64IM-NEXT: srliw a1, a0, 31
+; RV64IM-NEXT: srai a0, a0, 17
+; RV64IM-NEXT: add a0, a0, a1
; RV64IM-NEXT: ret
%1 = sdiv i16 %a, 5
ret i16 %1
;
; RV64I-LABEL: srem_pow2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: srli a1, a1, 60
-; RV64I-NEXT: andi a1, a1, 7
+; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srliw a1, a1, 29
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: andi a1, a1, -8
; RV64I-NEXT: subw a0, a0, a1
;
; RV64IM-LABEL: srem_pow2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sext.w a1, a0
-; RV64IM-NEXT: srli a1, a1, 60
-; RV64IM-NEXT: andi a1, a1, 7
+; RV64IM-NEXT: sraiw a1, a0, 31
+; RV64IM-NEXT: srliw a1, a1, 29
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: andi a1, a1, -8
; RV64IM-NEXT: subw a0, a0, a1
;
; RV64I-LABEL: srem_pow2_2:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: srli a1, a1, 47
-; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a2, a2, -1
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srliw a1, a1, 16
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: lui a2, 1048560
; RV64I-NEXT: and a1, a1, a2
;
; RV64IM-LABEL: srem_pow2_2:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sext.w a1, a0
-; RV64IM-NEXT: srli a1, a1, 47
-; RV64IM-NEXT: lui a2, 16
-; RV64IM-NEXT: addiw a2, a2, -1
-; RV64IM-NEXT: and a1, a1, a2
+; RV64IM-NEXT: sraiw a1, a0, 31
+; RV64IM-NEXT: srliw a1, a1, 16
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: lui a2, 1048560
; RV64IM-NEXT: and a1, a1, a2
; RV32I-LABEL: packh_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: slli a1, a1, 8
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: addi a2, a2, -256
-; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: slli a1, a1, 24
+; RV32I-NEXT: srli a1, a1, 16
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: ret
;
; RV32I-LABEL: packh_i64:
; RV32I: # %bb.0:
; RV32I-NEXT: andi a0, a0, 255
-; RV32I-NEXT: slli a1, a2, 8
-; RV32I-NEXT: lui a2, 16
-; RV32I-NEXT: addi a2, a2, -256
-; RV32I-NEXT: and a1, a1, a2
+; RV32I-NEXT: slli a1, a2, 24
+; RV32I-NEXT: srli a1, a1, 16
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: mv a1, zero
; RV32I-NEXT: ret
; RV64I-LABEL: zext_sraiw_aext:
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: srli a0, a0, 7
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli a0, a0, 25
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
%1 = ashr i32 %a, 7
define zeroext i32 @zext_sraiw_sext(i32 signext %a) nounwind {
; RV64I-LABEL: zext_sraiw_sext:
; RV64I: # %bb.0:
-; RV64I-NEXT: srli a0, a0, 8
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli a0, a0, 24
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
%1 = ashr i32 %a, 8
; RV64I-LABEL: zext_sraiw_zext:
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: srli a0, a0, 9
-; RV64I-NEXT: slli a0, a0, 32
+; RV64I-NEXT: slli a0, a0, 23
; RV64I-NEXT: srli a0, a0, 32
; RV64I-NEXT: ret
%1 = ashr i32 %a, 9
; RV64I-LABEL: packh_i32:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a0, a0, 255
-; RV64I-NEXT: slli a1, a1, 8
-; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a2, a2, -256
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: srli a1, a1, 48
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
; RV64I-LABEL: packh_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: andi a0, a0, 255
-; RV64I-NEXT: slli a1, a1, 8
-; RV64I-NEXT: lui a2, 16
-; RV64I-NEXT: addiw a2, a2, -256
-; RV64I-NEXT: and a1, a1, a2
+; RV64I-NEXT: slli a1, a1, 56
+; RV64I-NEXT: srli a1, a1, 48
; RV64I-NEXT: or a0, a1, a0
; RV64I-NEXT: ret
;
;
; RV64I-LABEL: dont_fold_srem_power_of_two:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: srli a1, a1, 57
-; RV64I-NEXT: andi a1, a1, 63
+; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srliw a1, a1, 26
; RV64I-NEXT: add a1, a0, a1
; RV64I-NEXT: andi a1, a1, -64
; RV64I-NEXT: subw a0, a0, a1
;
; RV64IM-LABEL: dont_fold_srem_power_of_two:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sext.w a1, a0
-; RV64IM-NEXT: srli a1, a1, 57
-; RV64IM-NEXT: andi a1, a1, 63
+; RV64IM-NEXT: sraiw a1, a0, 31
+; RV64IM-NEXT: srliw a1, a1, 26
; RV64IM-NEXT: add a1, a0, a1
; RV64IM-NEXT: andi a1, a1, -64
; RV64IM-NEXT: subw a0, a0, a1
;
; RV64I-LABEL: dont_fold_srem_i32_smax:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a1, a0
-; RV64I-NEXT: srli a1, a1, 32
-; RV64I-NEXT: lui a2, 524288
-; RV64I-NEXT: addiw a3, a2, -1
-; RV64I-NEXT: and a1, a1, a3
+; RV64I-NEXT: sraiw a1, a0, 31
+; RV64I-NEXT: srliw a1, a1, 1
; RV64I-NEXT: add a1, a0, a1
+; RV64I-NEXT: lui a2, 524288
; RV64I-NEXT: and a1, a1, a2
; RV64I-NEXT: addw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64IM-LABEL: dont_fold_srem_i32_smax:
; RV64IM: # %bb.0:
-; RV64IM-NEXT: sext.w a1, a0
-; RV64IM-NEXT: srli a1, a1, 32
-; RV64IM-NEXT: lui a2, 524288
-; RV64IM-NEXT: addiw a3, a2, -1
-; RV64IM-NEXT: and a1, a1, a3
+; RV64IM-NEXT: sraiw a1, a0, 31
+; RV64IM-NEXT: srliw a1, a1, 1
; RV64IM-NEXT: add a1, a0, a1
+; RV64IM-NEXT: lui a2, 524288
; RV64IM-NEXT: and a1, a1, a2
; RV64IM-NEXT: addw a0, a0, a1
; RV64IM-NEXT: ret