ReplaceNode(Node, ADDI);
return;
}
+ case ISD::SHL: {
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N0.getOperand(1)))
+ break;
+ unsigned ShAmt = N1C->getZExtValue();
+ uint64_t Mask = N0.getConstantOperandVal(1);
+
+ // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
+ // 32 leading zeros and C3 trailing zeros.
+ if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
+ unsigned XLen = Subtarget->getXLen();
+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ if (TrailingZeros > 0 && LeadingZeros == 32) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+ break;
+ }
case ISD::SRL: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
%6 = load i64, i64* %5, align 8
ret i64 %6
}
+
+; FIXME: This should select sh2add with Zba.
+define signext i32 @srliw_1_sh2add(i32* %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh2add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a1, 1
+; RV64I-NEXT: slli a1, a1, 2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lw a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srliw_1_sh2add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: li a2, -2
+; RV64ZBA-NEXT: zext.w a2, a2
+; RV64ZBA-NEXT: and a1, a1, a2
+; RV64ZBA-NEXT: sh1add a0, a1, a0
+; RV64ZBA-NEXT: lw a0, 0(a0)
+; RV64ZBA-NEXT: ret
+ %3 = lshr i32 %1, 1
+ %4 = zext i32 %3 to i64
+ %5 = getelementptr inbounds i32, i32* %0, i64 %4
+ %6 = load i32, i32* %5, align 4
+ ret i32 %6
+}
+
+; FIXME: This should select sh3add with Zba.
+define i64 @srliw_1_sh3add(i64* %0, i32 signext %1) {
+; RV64I-LABEL: srliw_1_sh3add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a1, 1
+; RV64I-NEXT: slli a1, a1, 3
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srliw_1_sh3add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: li a2, -2
+; RV64ZBA-NEXT: zext.w a2, a2
+; RV64ZBA-NEXT: and a1, a1, a2
+; RV64ZBA-NEXT: sh2add a0, a1, a0
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: ret
+ %3 = lshr i32 %1, 1
+ %4 = zext i32 %3 to i64
+ %5 = getelementptr inbounds i64, i64* %0, i64 %4
+ %6 = load i64, i64* %5, align 8
+ ret i64 %6
+}
+
+; FIXME: This should select sh3add with Zba.
+define i64 @srliw_2_sh3add(i64* %0, i32 signext %1) {
+; RV64I-LABEL: srliw_2_sh3add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srliw a1, a1, 2
+; RV64I-NEXT: slli a1, a1, 3
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srliw_2_sh3add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: li a2, -4
+; RV64ZBA-NEXT: zext.w a2, a2
+; RV64ZBA-NEXT: and a1, a1, a2
+; RV64ZBA-NEXT: sh1add a0, a1, a0
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: ret
+ %3 = lshr i32 %1, 2
+ %4 = zext i32 %3 to i64
+ %5 = getelementptr inbounds i64, i64* %0, i64 %4
+ %6 = load i64, i64* %5, align 8
+ ret i64 %6
+}