/// SHXADD we are trying to match.
bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
SDValue &Val) {
+ if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
+ SDValue N0 = N.getOperand(0);
+
+ bool LeftShift = N0.getOpcode() == ISD::SHL;
+ if ((LeftShift || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1))) {
+ uint64_t Mask = N.getConstantOperandVal(1);
+ unsigned C2 = N0.getConstantOperandVal(1);
+
+ unsigned XLen = Subtarget->getXLen();
+ if (LeftShift)
+ Mask &= maskTrailingZeros<uint64_t>(C2);
+ else
+ Mask &= maskTrailingOnes<uint64_t>(XLen - C2);
+
+ // Look for (and (shl y, c2), c1) where c1 is a shifted mask with no
+ // leading zeros and c3 trailing zeros. We can use an SRLI by c2+c3
+ // followed by a SHXADD with c3 for the X amount.
+ if (isShiftedMask_64(Mask)) {
+ unsigned Leading = XLen - (64 - countLeadingZeros(Mask));
+ unsigned Trailing = countTrailingZeros(Mask);
+ if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Trailing - C2, DL, VT)),
+ 0);
+ return true;
+ }
+ // Look for (and (shr y, c2), c1) where c1 is a shifted mask with c2
+ // leading zeros and c3 trailing zeros. We can use an SRLI by C3
+ // followed by a SHXADD using c3 for the X amount.
+ if (!LeftShift && Leading == C2 && Trailing == ShAmt) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ Val = SDValue(
+ CurDAG->getMachineNode(
+ RISCV::SRLI, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Leading + Trailing, DL, VT)),
+ 0);
+ return true;
+ }
+ }
+ }
+ }
+
bool LeftShift = N.getOpcode() == ISD::SHL;
if ((LeftShift || N.getOpcode() == ISD::SRL) &&
isa<ConstantSDNode>(N.getOperand(1))) {
- unsigned C1 = N.getConstantOperandVal(1);
SDValue N0 = N.getOperand(0);
if (N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
uint64_t Mask = N0.getConstantOperandVal(1);
if (isShiftedMask_64(Mask)) {
+ unsigned C1 = N.getConstantOperandVal(1);
unsigned XLen = Subtarget->getXLen();
unsigned Leading = XLen - (64 - countLeadingZeros(Mask));
unsigned Trailing = countTrailingZeros(Mask);
}
define signext i32 @srli_1_sh2add(i32* %0, i64 %1) {
-; CHECK-LABEL: srli_1_sh2add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: andi a1, a1, -4
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: lw a0, 0(a0)
-; CHECK-NEXT: ret
+; RV64I-LABEL: srli_1_sh2add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: andi a1, a1, -4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lw a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srli_1_sh2add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srli a1, a1, 1
+; RV64ZBA-NEXT: sh2add a0, a1, a0
+; RV64ZBA-NEXT: lw a0, 0(a0)
+; RV64ZBA-NEXT: ret
%3 = lshr i64 %1, 1
%4 = getelementptr inbounds i32, i32* %0, i64 %3
%5 = load i32, i32* %4, align 4
}
define i64 @srli_2_sh3add(i64* %0, i64 %1) {
-; CHECK-LABEL: srli_2_sh3add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: andi a1, a1, -8
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ld a0, 0(a0)
-; CHECK-NEXT: ret
+; RV64I-LABEL: srli_2_sh3add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: slli a1, a1, 1
+; RV64I-NEXT: andi a1, a1, -8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srli_2_sh3add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srli a1, a1, 2
+; RV64ZBA-NEXT: sh3add a0, a1, a0
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: ret
%3 = lshr i64 %1, 2
%4 = getelementptr inbounds i64, i64* %0, i64 %3
%5 = load i64, i64* %4, align 8
}
define signext i16 @srli_2_sh1add(i16* %0, i64 %1) {
-; CHECK-LABEL: srli_2_sh1add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: andi a1, a1, -2
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: lh a0, 0(a0)
-; CHECK-NEXT: ret
+; RV64I-LABEL: srli_2_sh1add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a1, 1
+; RV64I-NEXT: andi a1, a1, -2
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lh a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srli_2_sh1add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srli a1, a1, 2
+; RV64ZBA-NEXT: sh1add a0, a1, a0
+; RV64ZBA-NEXT: lh a0, 0(a0)
+; RV64ZBA-NEXT: ret
%3 = lshr i64 %1, 2
%4 = getelementptr inbounds i16, i16* %0, i64 %3
%5 = load i16, i16* %4, align 2
}
define signext i32 @srli_3_sh2add(i32* %0, i64 %1) {
-; CHECK-LABEL: srli_3_sh2add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: andi a1, a1, -4
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: lw a0, 0(a0)
-; CHECK-NEXT: ret
+; RV64I-LABEL: srli_3_sh2add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a1, 1
+; RV64I-NEXT: andi a1, a1, -4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: lw a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srli_3_sh2add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srli a1, a1, 3
+; RV64ZBA-NEXT: sh2add a0, a1, a0
+; RV64ZBA-NEXT: lw a0, 0(a0)
+; RV64ZBA-NEXT: ret
%3 = lshr i64 %1, 3
%4 = getelementptr inbounds i32, i32* %0, i64 %3
%5 = load i32, i32* %4, align 4
}
define i64 @srli_4_sh3add(i64* %0, i64 %1) {
-; CHECK-LABEL: srli_4_sh3add:
-; CHECK: # %bb.0:
-; CHECK-NEXT: srli a1, a1, 1
-; CHECK-NEXT: andi a1, a1, -8
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: ld a0, 0(a0)
-; CHECK-NEXT: ret
+; RV64I-LABEL: srli_4_sh3add:
+; RV64I: # %bb.0:
+; RV64I-NEXT: srli a1, a1, 1
+; RV64I-NEXT: andi a1, a1, -8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+;
+; RV64ZBA-LABEL: srli_4_sh3add:
+; RV64ZBA: # %bb.0:
+; RV64ZBA-NEXT: srli a1, a1, 4
+; RV64ZBA-NEXT: sh3add a0, a1, a0
+; RV64ZBA-NEXT: ld a0, 0(a0)
+; RV64ZBA-NEXT: ret
%3 = lshr i64 %1, 4
%4 = getelementptr inbounds i64, i64* %0, i64 %3
%5 = load i64, i64* %4, align 8