return Addr;
}
+// Return true if Val is equal to (setcc LHS, RHS, CC).
+// Return false if Val is the inverse of (setcc LHS, RHS, CC).
+// Otherwise, return std::nullopt.
+static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue Val) {
+ assert(Val->getOpcode() == ISD::SETCC);
+ SDValue LHS2 = Val.getOperand(0);
+ SDValue RHS2 = Val.getOperand(1);
+ ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
+
+ if (LHS == LHS2 && RHS == RHS2) {
+ if (CC == CC2)
+ return true;
+ if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
+ return false;
+ } else if (LHS == RHS2 && RHS == LHS2) {
+ CC2 = ISD::getSetCCSwappedOperands(CC2);
+ if (CC == CC2)
+ return true;
+ if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
+ return false;
+ }
+
+ return std::nullopt;
+}
+
static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue CondV = N->getOperand(0);
}
}
+ // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
+ // when both truev and falsev are also setcc.
+ if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
+ FalseV.getOpcode() == ISD::SETCC) {
+ SDValue LHS = CondV.getOperand(0);
+ SDValue RHS = CondV.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
+
+ // (select x, x, y) -> x | y
+ // (select !x, x, y) -> x & y
+ if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
+ return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
+ FalseV);
+ }
+ // (select x, y, x) -> x & y
+ // (select !x, y, x) -> x | y
+ if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
+ return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
+ FalseV);
+ }
+ }
+
return SDValue();
}
; RV32-NEXT: call __atomic_compare_exchange_8@plt
; RV32-NEXT: lw a1, 4(sp)
; RV32-NEXT: lw a4, 0(sp)
-; RV32-NEXT: bnez a0, .LBB51_6
+; RV32-NEXT: bnez a0, .LBB51_4
; RV32-NEXT: .LBB51_2: # %atomicrmw.start
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
-; RV32-NEXT: beqz a1, .LBB51_4
-; RV32-NEXT: # %bb.3: # %atomicrmw.start
-; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: snez a0, a1
+; RV32-NEXT: sltiu a2, a4, 2
+; RV32-NEXT: xori a2, a2, 1
+; RV32-NEXT: or a0, a2, a0
; RV32-NEXT: mv a2, a4
; RV32-NEXT: bnez a0, .LBB51_1
-; RV32-NEXT: j .LBB51_5
-; RV32-NEXT: .LBB51_4: # in Loop: Header=BB51_2 Depth=1
-; RV32-NEXT: sltiu a0, a4, 2
-; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: mv a2, a4
-; RV32-NEXT: bnez a0, .LBB51_1
-; RV32-NEXT: .LBB51_5: # %atomicrmw.start
+; RV32-NEXT: # %bb.3: # %atomicrmw.start
; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1
; RV32-NEXT: li a2, 1
; RV32-NEXT: j .LBB51_1
-; RV32-NEXT: .LBB51_6: # %atomicrmw.end
+; RV32-NEXT: .LBB51_4: # %atomicrmw.end
; RV32-NEXT: mv a0, a4
; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32IF-NEXT: .LBB20_2:
; RV32IF-NEXT: seqz a2, a0
; RV32IF-NEXT: .LBB20_3: # %entry
-; RV32IF-NEXT: lw a3, 12(sp)
-; RV32IF-NEXT: xori a4, a0, 1
-; RV32IF-NEXT: or a4, a4, a1
-; RV32IF-NEXT: seqz a4, a4
-; RV32IF-NEXT: addi a4, a4, -1
-; RV32IF-NEXT: and a2, a4, a2
-; RV32IF-NEXT: neg a4, a2
-; RV32IF-NEXT: bnez a2, .LBB20_5
+; RV32IF-NEXT: xori a3, a0, 1
+; RV32IF-NEXT: or a3, a3, a1
+; RV32IF-NEXT: seqz a3, a3
+; RV32IF-NEXT: addi a3, a3, -1
+; RV32IF-NEXT: and a3, a3, a2
+; RV32IF-NEXT: neg a2, a3
+; RV32IF-NEXT: bnez a3, .LBB20_5
; RV32IF-NEXT: # %bb.4: # %entry
; RV32IF-NEXT: li a0, 1
; RV32IF-NEXT: .LBB20_5: # %entry
-; RV32IF-NEXT: lw a5, 8(sp)
-; RV32IF-NEXT: and a2, a4, a1
-; RV32IF-NEXT: and a1, a4, a3
-; RV32IF-NEXT: beqz a2, .LBB20_8
+; RV32IF-NEXT: lw a3, 8(sp)
+; RV32IF-NEXT: lw a4, 12(sp)
+; RV32IF-NEXT: and a5, a2, a1
+; RV32IF-NEXT: beqz a5, .LBB20_7
; RV32IF-NEXT: # %bb.6: # %entry
-; RV32IF-NEXT: sgtz a3, a2
-; RV32IF-NEXT: and a4, a4, a5
-; RV32IF-NEXT: bnez a1, .LBB20_9
+; RV32IF-NEXT: sgtz a1, a5
+; RV32IF-NEXT: j .LBB20_8
; RV32IF-NEXT: .LBB20_7:
-; RV32IF-NEXT: snez a5, a4
-; RV32IF-NEXT: or a0, a0, a2
+; RV32IF-NEXT: snez a1, a0
+; RV32IF-NEXT: .LBB20_8: # %entry
+; RV32IF-NEXT: and a4, a2, a4
+; RV32IF-NEXT: or a0, a0, a5
+; RV32IF-NEXT: and a2, a2, a3
; RV32IF-NEXT: bnez a0, .LBB20_10
-; RV32IF-NEXT: j .LBB20_11
-; RV32IF-NEXT: .LBB20_8:
-; RV32IF-NEXT: snez a3, a0
-; RV32IF-NEXT: and a4, a4, a5
-; RV32IF-NEXT: beqz a1, .LBB20_7
-; RV32IF-NEXT: .LBB20_9: # %entry
-; RV32IF-NEXT: snez a5, a1
-; RV32IF-NEXT: or a0, a0, a2
-; RV32IF-NEXT: beqz a0, .LBB20_11
+; RV32IF-NEXT: # %bb.9:
+; RV32IF-NEXT: or a0, a2, a4
+; RV32IF-NEXT: snez a1, a0
; RV32IF-NEXT: .LBB20_10: # %entry
-; RV32IF-NEXT: mv a5, a3
-; RV32IF-NEXT: .LBB20_11: # %entry
-; RV32IF-NEXT: neg a2, a5
-; RV32IF-NEXT: and a0, a2, a4
-; RV32IF-NEXT: and a1, a2, a1
+; RV32IF-NEXT: neg a1, a1
+; RV32IF-NEXT: and a0, a1, a2
+; RV32IF-NEXT: and a1, a1, a4
; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IF-NEXT: addi sp, sp, 32
; RV32IF-NEXT: ret
; RV32IFD-NEXT: .LBB20_2:
; RV32IFD-NEXT: seqz a2, a0
; RV32IFD-NEXT: .LBB20_3: # %entry
-; RV32IFD-NEXT: lw a3, 12(sp)
-; RV32IFD-NEXT: xori a4, a0, 1
-; RV32IFD-NEXT: or a4, a4, a1
-; RV32IFD-NEXT: seqz a4, a4
-; RV32IFD-NEXT: addi a4, a4, -1
-; RV32IFD-NEXT: and a2, a4, a2
-; RV32IFD-NEXT: neg a4, a2
-; RV32IFD-NEXT: bnez a2, .LBB20_5
+; RV32IFD-NEXT: xori a3, a0, 1
+; RV32IFD-NEXT: or a3, a3, a1
+; RV32IFD-NEXT: seqz a3, a3
+; RV32IFD-NEXT: addi a3, a3, -1
+; RV32IFD-NEXT: and a3, a3, a2
+; RV32IFD-NEXT: neg a2, a3
+; RV32IFD-NEXT: bnez a3, .LBB20_5
; RV32IFD-NEXT: # %bb.4: # %entry
; RV32IFD-NEXT: li a0, 1
; RV32IFD-NEXT: .LBB20_5: # %entry
-; RV32IFD-NEXT: lw a5, 8(sp)
-; RV32IFD-NEXT: and a2, a4, a1
-; RV32IFD-NEXT: and a1, a4, a3
-; RV32IFD-NEXT: beqz a2, .LBB20_8
+; RV32IFD-NEXT: lw a3, 8(sp)
+; RV32IFD-NEXT: lw a4, 12(sp)
+; RV32IFD-NEXT: and a5, a2, a1
+; RV32IFD-NEXT: beqz a5, .LBB20_7
; RV32IFD-NEXT: # %bb.6: # %entry
-; RV32IFD-NEXT: sgtz a3, a2
-; RV32IFD-NEXT: and a4, a4, a5
-; RV32IFD-NEXT: bnez a1, .LBB20_9
+; RV32IFD-NEXT: sgtz a1, a5
+; RV32IFD-NEXT: j .LBB20_8
; RV32IFD-NEXT: .LBB20_7:
-; RV32IFD-NEXT: snez a5, a4
-; RV32IFD-NEXT: or a0, a0, a2
+; RV32IFD-NEXT: snez a1, a0
+; RV32IFD-NEXT: .LBB20_8: # %entry
+; RV32IFD-NEXT: and a4, a2, a4
+; RV32IFD-NEXT: or a0, a0, a5
+; RV32IFD-NEXT: and a2, a2, a3
; RV32IFD-NEXT: bnez a0, .LBB20_10
-; RV32IFD-NEXT: j .LBB20_11
-; RV32IFD-NEXT: .LBB20_8:
-; RV32IFD-NEXT: snez a3, a0
-; RV32IFD-NEXT: and a4, a4, a5
-; RV32IFD-NEXT: beqz a1, .LBB20_7
-; RV32IFD-NEXT: .LBB20_9: # %entry
-; RV32IFD-NEXT: snez a5, a1
-; RV32IFD-NEXT: or a0, a0, a2
-; RV32IFD-NEXT: beqz a0, .LBB20_11
+; RV32IFD-NEXT: # %bb.9:
+; RV32IFD-NEXT: or a0, a2, a4
+; RV32IFD-NEXT: snez a1, a0
; RV32IFD-NEXT: .LBB20_10: # %entry
-; RV32IFD-NEXT: mv a5, a3
-; RV32IFD-NEXT: .LBB20_11: # %entry
-; RV32IFD-NEXT: neg a2, a5
-; RV32IFD-NEXT: and a0, a2, a4
-; RV32IFD-NEXT: and a1, a2, a1
+; RV32IFD-NEXT: neg a1, a1
+; RV32IFD-NEXT: and a0, a1, a2
+; RV32IFD-NEXT: and a1, a1, a4
; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32IFD-NEXT: addi sp, sp, 32
; RV32IFD-NEXT: ret
; RV32-NEXT: .LBB23_2:
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB23_3: # %entry
-; RV32-NEXT: lw a3, 12(sp)
-; RV32-NEXT: xori a4, a0, 1
-; RV32-NEXT: or a4, a4, a1
-; RV32-NEXT: seqz a4, a4
-; RV32-NEXT: addi a4, a4, -1
-; RV32-NEXT: and a2, a4, a2
-; RV32-NEXT: neg a4, a2
-; RV32-NEXT: bnez a2, .LBB23_5
+; RV32-NEXT: xori a3, a0, 1
+; RV32-NEXT: or a3, a3, a1
+; RV32-NEXT: seqz a3, a3
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a3, a3, a2
+; RV32-NEXT: neg a2, a3
+; RV32-NEXT: bnez a3, .LBB23_5
; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB23_5: # %entry
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: and a2, a4, a1
-; RV32-NEXT: and a1, a4, a3
-; RV32-NEXT: beqz a2, .LBB23_8
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: and a5, a2, a1
+; RV32-NEXT: beqz a5, .LBB23_7
; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: sgtz a3, a2
-; RV32-NEXT: and a4, a4, a5
-; RV32-NEXT: bnez a1, .LBB23_9
+; RV32-NEXT: sgtz a1, a5
+; RV32-NEXT: j .LBB23_8
; RV32-NEXT: .LBB23_7:
-; RV32-NEXT: snez a5, a4
-; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: snez a1, a0
+; RV32-NEXT: .LBB23_8: # %entry
+; RV32-NEXT: and a4, a2, a4
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: bnez a0, .LBB23_10
-; RV32-NEXT: j .LBB23_11
-; RV32-NEXT: .LBB23_8:
-; RV32-NEXT: snez a3, a0
-; RV32-NEXT: and a4, a4, a5
-; RV32-NEXT: beqz a1, .LBB23_7
-; RV32-NEXT: .LBB23_9: # %entry
-; RV32-NEXT: snez a5, a1
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: beqz a0, .LBB23_11
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: or a0, a2, a4
+; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB23_10: # %entry
-; RV32-NEXT: mv a5, a3
-; RV32-NEXT: .LBB23_11: # %entry
-; RV32-NEXT: neg a2, a5
-; RV32-NEXT: and a0, a2, a4
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a2
+; RV32-NEXT: and a1, a1, a4
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
; RV32-NEXT: .LBB26_2:
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: .LBB26_3: # %entry
-; RV32-NEXT: lw a3, 12(sp)
-; RV32-NEXT: xori a4, a0, 1
-; RV32-NEXT: or a4, a4, a1
-; RV32-NEXT: seqz a4, a4
-; RV32-NEXT: addi a4, a4, -1
-; RV32-NEXT: and a2, a4, a2
-; RV32-NEXT: neg a4, a2
-; RV32-NEXT: bnez a2, .LBB26_5
+; RV32-NEXT: xori a3, a0, 1
+; RV32-NEXT: or a3, a3, a1
+; RV32-NEXT: seqz a3, a3
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a3, a3, a2
+; RV32-NEXT: neg a2, a3
+; RV32-NEXT: bnez a3, .LBB26_5
; RV32-NEXT: # %bb.4: # %entry
; RV32-NEXT: li a0, 1
; RV32-NEXT: .LBB26_5: # %entry
-; RV32-NEXT: lw a5, 8(sp)
-; RV32-NEXT: and a2, a4, a1
-; RV32-NEXT: and a1, a4, a3
-; RV32-NEXT: beqz a2, .LBB26_8
+; RV32-NEXT: lw a3, 8(sp)
+; RV32-NEXT: lw a4, 12(sp)
+; RV32-NEXT: and a5, a2, a1
+; RV32-NEXT: beqz a5, .LBB26_7
; RV32-NEXT: # %bb.6: # %entry
-; RV32-NEXT: sgtz a3, a2
-; RV32-NEXT: and a4, a4, a5
-; RV32-NEXT: bnez a1, .LBB26_9
+; RV32-NEXT: sgtz a1, a5
+; RV32-NEXT: j .LBB26_8
; RV32-NEXT: .LBB26_7:
-; RV32-NEXT: snez a5, a4
-; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: snez a1, a0
+; RV32-NEXT: .LBB26_8: # %entry
+; RV32-NEXT: and a4, a2, a4
+; RV32-NEXT: or a0, a0, a5
+; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: bnez a0, .LBB26_10
-; RV32-NEXT: j .LBB26_11
-; RV32-NEXT: .LBB26_8:
-; RV32-NEXT: snez a3, a0
-; RV32-NEXT: and a4, a4, a5
-; RV32-NEXT: beqz a1, .LBB26_7
-; RV32-NEXT: .LBB26_9: # %entry
-; RV32-NEXT: snez a5, a1
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: beqz a0, .LBB26_11
+; RV32-NEXT: # %bb.9:
+; RV32-NEXT: or a0, a2, a4
+; RV32-NEXT: snez a1, a0
; RV32-NEXT: .LBB26_10: # %entry
-; RV32-NEXT: mv a5, a3
-; RV32-NEXT: .LBB26_11: # %entry
-; RV32-NEXT: neg a2, a5
-; RV32-NEXT: and a0, a2, a4
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a2
+; RV32-NEXT: and a1, a1, a4
; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
define i128 @abs128(i128 %x) {
; RV32I-LABEL: abs128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a2, 4(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: snez a5, a3
-; RV32I-NEXT: mv a6, a5
-; RV32I-NEXT: beqz a2, .LBB8_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: snez a6, a2
-; RV32I-NEXT: .LBB8_2:
+; RV32I-NEXT: lw a2, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: bgez a4, .LBB8_4
-; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: neg a7, a1
-; RV32I-NEXT: sltu t0, a7, a6
+; RV32I-NEXT: bgez a2, .LBB8_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: neg a5, a1
+; RV32I-NEXT: or a6, a4, a3
+; RV32I-NEXT: snez a6, a6
+; RV32I-NEXT: sltu a7, a5, a6
; RV32I-NEXT: snez a1, a1
-; RV32I-NEXT: add a1, a4, a1
-; RV32I-NEXT: add a1, a1, t0
-; RV32I-NEXT: neg a4, a1
-; RV32I-NEXT: sub a1, a7, a6
-; RV32I-NEXT: add a2, a2, a5
-; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: sub a2, a1, a7
+; RV32I-NEXT: sub a1, a5, a6
+; RV32I-NEXT: snez a5, a4
; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: .LBB8_4:
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sub a3, a3, a5
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: .LBB8_2:
+; RV32I-NEXT: sw a4, 0(a0)
; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a2, 4(a0)
-; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a2, 4(a1)
-; RV32ZBB-NEXT: lw a4, 12(a1)
-; RV32ZBB-NEXT: snez a5, a3
-; RV32ZBB-NEXT: mv a6, a5
-; RV32ZBB-NEXT: beqz a2, .LBB8_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: snez a6, a2
-; RV32ZBB-NEXT: .LBB8_2:
+; RV32ZBB-NEXT: lw a2, 12(a1)
+; RV32ZBB-NEXT: lw a3, 4(a1)
+; RV32ZBB-NEXT: lw a4, 0(a1)
; RV32ZBB-NEXT: lw a1, 8(a1)
-; RV32ZBB-NEXT: bgez a4, .LBB8_4
-; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: neg a7, a1
-; RV32ZBB-NEXT: sltu t0, a7, a6
+; RV32ZBB-NEXT: bgez a2, .LBB8_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: neg a5, a1
+; RV32ZBB-NEXT: or a6, a4, a3
+; RV32ZBB-NEXT: snez a6, a6
+; RV32ZBB-NEXT: sltu a7, a5, a6
; RV32ZBB-NEXT: snez a1, a1
-; RV32ZBB-NEXT: add a1, a4, a1
-; RV32ZBB-NEXT: add a1, a1, t0
-; RV32ZBB-NEXT: neg a4, a1
-; RV32ZBB-NEXT: sub a1, a7, a6
-; RV32ZBB-NEXT: add a2, a2, a5
-; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: sub a2, a1, a7
+; RV32ZBB-NEXT: sub a1, a5, a6
+; RV32ZBB-NEXT: snez a5, a4
; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: .LBB8_4:
-; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sub a3, a3, a5
+; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: .LBB8_2:
+; RV32ZBB-NEXT: sw a4, 0(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a2, 4(a0)
-; RV32ZBB-NEXT: sw a4, 12(a0)
+; RV32ZBB-NEXT: sw a3, 4(a0)
+; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: abs128:
define i128 @select_abs128(i128 %x) {
; RV32I-LABEL: select_abs128:
; RV32I: # %bb.0:
-; RV32I-NEXT: lw a3, 0(a1)
-; RV32I-NEXT: lw a2, 4(a1)
-; RV32I-NEXT: lw a4, 12(a1)
-; RV32I-NEXT: snez a5, a3
-; RV32I-NEXT: mv a6, a5
-; RV32I-NEXT: beqz a2, .LBB9_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: snez a6, a2
-; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: lw a2, 12(a1)
+; RV32I-NEXT: lw a3, 4(a1)
+; RV32I-NEXT: lw a4, 0(a1)
; RV32I-NEXT: lw a1, 8(a1)
-; RV32I-NEXT: bgez a4, .LBB9_4
-; RV32I-NEXT: # %bb.3:
-; RV32I-NEXT: neg a7, a1
-; RV32I-NEXT: sltu t0, a7, a6
+; RV32I-NEXT: bgez a2, .LBB9_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: neg a5, a1
+; RV32I-NEXT: or a6, a4, a3
+; RV32I-NEXT: snez a6, a6
+; RV32I-NEXT: sltu a7, a5, a6
; RV32I-NEXT: snez a1, a1
-; RV32I-NEXT: add a1, a4, a1
-; RV32I-NEXT: add a1, a1, t0
-; RV32I-NEXT: neg a4, a1
-; RV32I-NEXT: sub a1, a7, a6
-; RV32I-NEXT: add a2, a2, a5
-; RV32I-NEXT: neg a2, a2
+; RV32I-NEXT: add a1, a2, a1
+; RV32I-NEXT: neg a1, a1
+; RV32I-NEXT: sub a2, a1, a7
+; RV32I-NEXT: sub a1, a5, a6
+; RV32I-NEXT: snez a5, a4
; RV32I-NEXT: neg a3, a3
-; RV32I-NEXT: .LBB9_4:
-; RV32I-NEXT: sw a3, 0(a0)
+; RV32I-NEXT: sub a3, a3, a5
+; RV32I-NEXT: neg a4, a4
+; RV32I-NEXT: .LBB9_2:
+; RV32I-NEXT: sw a4, 0(a0)
; RV32I-NEXT: sw a1, 8(a0)
-; RV32I-NEXT: sw a2, 4(a0)
-; RV32I-NEXT: sw a4, 12(a0)
+; RV32I-NEXT: sw a3, 4(a0)
+; RV32I-NEXT: sw a2, 12(a0)
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: select_abs128:
; RV32ZBB: # %bb.0:
-; RV32ZBB-NEXT: lw a3, 0(a1)
-; RV32ZBB-NEXT: lw a2, 4(a1)
-; RV32ZBB-NEXT: lw a4, 12(a1)
-; RV32ZBB-NEXT: snez a5, a3
-; RV32ZBB-NEXT: mv a6, a5
-; RV32ZBB-NEXT: beqz a2, .LBB9_2
-; RV32ZBB-NEXT: # %bb.1:
-; RV32ZBB-NEXT: snez a6, a2
-; RV32ZBB-NEXT: .LBB9_2:
+; RV32ZBB-NEXT: lw a2, 12(a1)
+; RV32ZBB-NEXT: lw a3, 4(a1)
+; RV32ZBB-NEXT: lw a4, 0(a1)
; RV32ZBB-NEXT: lw a1, 8(a1)
-; RV32ZBB-NEXT: bgez a4, .LBB9_4
-; RV32ZBB-NEXT: # %bb.3:
-; RV32ZBB-NEXT: neg a7, a1
-; RV32ZBB-NEXT: sltu t0, a7, a6
+; RV32ZBB-NEXT: bgez a2, .LBB9_2
+; RV32ZBB-NEXT: # %bb.1:
+; RV32ZBB-NEXT: neg a5, a1
+; RV32ZBB-NEXT: or a6, a4, a3
+; RV32ZBB-NEXT: snez a6, a6
+; RV32ZBB-NEXT: sltu a7, a5, a6
; RV32ZBB-NEXT: snez a1, a1
-; RV32ZBB-NEXT: add a1, a4, a1
-; RV32ZBB-NEXT: add a1, a1, t0
-; RV32ZBB-NEXT: neg a4, a1
-; RV32ZBB-NEXT: sub a1, a7, a6
-; RV32ZBB-NEXT: add a2, a2, a5
-; RV32ZBB-NEXT: neg a2, a2
+; RV32ZBB-NEXT: add a1, a2, a1
+; RV32ZBB-NEXT: neg a1, a1
+; RV32ZBB-NEXT: sub a2, a1, a7
+; RV32ZBB-NEXT: sub a1, a5, a6
+; RV32ZBB-NEXT: snez a5, a4
; RV32ZBB-NEXT: neg a3, a3
-; RV32ZBB-NEXT: .LBB9_4:
-; RV32ZBB-NEXT: sw a3, 0(a0)
+; RV32ZBB-NEXT: sub a3, a3, a5
+; RV32ZBB-NEXT: neg a4, a4
+; RV32ZBB-NEXT: .LBB9_2:
+; RV32ZBB-NEXT: sw a4, 0(a0)
; RV32ZBB-NEXT: sw a1, 8(a0)
-; RV32ZBB-NEXT: sw a2, 4(a0)
-; RV32ZBB-NEXT: sw a4, 12(a0)
+; RV32ZBB-NEXT: sw a3, 4(a0)
+; RV32ZBB-NEXT: sw a2, 12(a0)
; RV32ZBB-NEXT: ret
;
; RV64I-LABEL: select_abs128:
; RV32I-NEXT: add a2, a0, a2
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: li a1, -1
-; RV32I-NEXT: beq a0, a1, .LBB10_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltiu a0, a0, -1
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB10_2:
-; RV32I-NEXT: lui a0, 1048560
-; RV32I-NEXT: sltu a0, a2, a0
+; RV32I-NEXT: lui a1, 1048560
+; RV32I-NEXT: sltu a1, a2, a1
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: add_ultcmp_i64_i16:
; RV32I-NEXT: addi a2, a0, -128
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: add a0, a1, a0
-; RV32I-NEXT: addi a0, a0, -1
-; RV32I-NEXT: li a1, -1
-; RV32I-NEXT: beq a0, a1, .LBB11_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: sltiu a0, a0, -1
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB11_2:
-; RV32I-NEXT: sltiu a0, a2, -256
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: sltiu a1, a2, -256
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: ret
;
; RV64-LABEL: add_ultcmp_i64_i8:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a2, a0, 128
; RV32I-NEXT: sltu a0, a2, a0
-; RV32I-NEXT: add a1, a1, a0
-; RV32I-NEXT: beqz a1, .LBB18_2
-; RV32I-NEXT: # %bb.1:
-; RV32I-NEXT: snez a0, a1
-; RV32I-NEXT: ret
-; RV32I-NEXT: .LBB18_2:
-; RV32I-NEXT: sltiu a0, a2, 256
-; RV32I-NEXT: xori a0, a0, 1
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: snez a0, a0
+; RV32I-NEXT: sltiu a1, a2, 256
+; RV32I-NEXT: xori a1, a1, 1
+; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: ret
;
; RV64-LABEL: add_ugecmp_i64_i8:
define zeroext i1 @uaddo.i64.constant(i64 %v1, ptr %res) {
; RV32-LABEL: uaddo.i64.constant:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: mv a3, a0
-; RV32-NEXT: addi a4, a0, 2
-; RV32-NEXT: sltu a0, a4, a0
-; RV32-NEXT: add a5, a1, a0
-; RV32-NEXT: bgeu a4, a3, .LBB66_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: sltu a0, a5, a1
-; RV32-NEXT: .LBB66_2: # %entry
-; RV32-NEXT: sw a4, 0(a2)
-; RV32-NEXT: sw a5, 4(a2)
+; RV32-NEXT: addi a3, a0, 2
+; RV32-NEXT: sltu a0, a3, a0
+; RV32-NEXT: add a4, a1, a0
+; RV32-NEXT: sltu a1, a4, a1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: sw a3, 0(a2)
+; RV32-NEXT: sw a4, 4(a2)
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo.i64.constant:
;
; RV32ZBA-LABEL: uaddo.i64.constant:
; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: mv a3, a0
-; RV32ZBA-NEXT: addi a4, a0, 2
-; RV32ZBA-NEXT: sltu a0, a4, a0
-; RV32ZBA-NEXT: add a5, a1, a0
-; RV32ZBA-NEXT: bgeu a4, a3, .LBB66_2
-; RV32ZBA-NEXT: # %bb.1: # %entry
-; RV32ZBA-NEXT: sltu a0, a5, a1
-; RV32ZBA-NEXT: .LBB66_2: # %entry
-; RV32ZBA-NEXT: sw a4, 0(a2)
-; RV32ZBA-NEXT: sw a5, 4(a2)
+; RV32ZBA-NEXT: addi a3, a0, 2
+; RV32ZBA-NEXT: sltu a0, a3, a0
+; RV32ZBA-NEXT: add a4, a1, a0
+; RV32ZBA-NEXT: sltu a1, a4, a1
+; RV32ZBA-NEXT: and a0, a0, a1
+; RV32ZBA-NEXT: sw a3, 0(a2)
+; RV32ZBA-NEXT: sw a4, 4(a2)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: uaddo.i64.constant:
define zeroext i1 @uaddo.i64.constant_2048(i64 %v1, ptr %res) {
; RV32-LABEL: uaddo.i64.constant_2048:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: mv a3, a0
-; RV32-NEXT: addi a4, a0, 2047
-; RV32-NEXT: addi a4, a4, 1
-; RV32-NEXT: sltu a0, a4, a0
-; RV32-NEXT: add a5, a1, a0
-; RV32-NEXT: bgeu a4, a3, .LBB67_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: sltu a0, a5, a1
-; RV32-NEXT: .LBB67_2: # %entry
-; RV32-NEXT: sw a4, 0(a2)
-; RV32-NEXT: sw a5, 4(a2)
+; RV32-NEXT: addi a3, a0, 2047
+; RV32-NEXT: addi a3, a3, 1
+; RV32-NEXT: sltu a0, a3, a0
+; RV32-NEXT: add a4, a1, a0
+; RV32-NEXT: sltu a1, a4, a1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: sw a3, 0(a2)
+; RV32-NEXT: sw a4, 4(a2)
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo.i64.constant_2048:
;
; RV32ZBA-LABEL: uaddo.i64.constant_2048:
; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: mv a3, a0
-; RV32ZBA-NEXT: addi a4, a0, 2047
-; RV32ZBA-NEXT: addi a4, a4, 1
-; RV32ZBA-NEXT: sltu a0, a4, a0
-; RV32ZBA-NEXT: add a5, a1, a0
-; RV32ZBA-NEXT: bgeu a4, a3, .LBB67_2
-; RV32ZBA-NEXT: # %bb.1: # %entry
-; RV32ZBA-NEXT: sltu a0, a5, a1
-; RV32ZBA-NEXT: .LBB67_2: # %entry
-; RV32ZBA-NEXT: sw a4, 0(a2)
-; RV32ZBA-NEXT: sw a5, 4(a2)
+; RV32ZBA-NEXT: addi a3, a0, 2047
+; RV32ZBA-NEXT: addi a3, a3, 1
+; RV32ZBA-NEXT: sltu a0, a3, a0
+; RV32ZBA-NEXT: add a4, a1, a0
+; RV32ZBA-NEXT: sltu a1, a4, a1
+; RV32ZBA-NEXT: and a0, a0, a1
+; RV32ZBA-NEXT: sw a3, 0(a2)
+; RV32ZBA-NEXT: sw a4, 4(a2)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: uaddo.i64.constant_2048:
define zeroext i1 @uaddo.i64.constant_2049(i64 %v1, ptr %res) {
; RV32-LABEL: uaddo.i64.constant_2049:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: mv a3, a0
-; RV32-NEXT: addi a4, a0, 2047
-; RV32-NEXT: addi a4, a4, 2
-; RV32-NEXT: sltu a0, a4, a0
-; RV32-NEXT: add a5, a1, a0
-; RV32-NEXT: bgeu a4, a3, .LBB68_2
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: sltu a0, a5, a1
-; RV32-NEXT: .LBB68_2: # %entry
-; RV32-NEXT: sw a4, 0(a2)
-; RV32-NEXT: sw a5, 4(a2)
+; RV32-NEXT: addi a3, a0, 2047
+; RV32-NEXT: addi a3, a3, 2
+; RV32-NEXT: sltu a0, a3, a0
+; RV32-NEXT: add a4, a1, a0
+; RV32-NEXT: sltu a1, a4, a1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: sw a3, 0(a2)
+; RV32-NEXT: sw a4, 4(a2)
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo.i64.constant_2049:
;
; RV32ZBA-LABEL: uaddo.i64.constant_2049:
; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: mv a3, a0
-; RV32ZBA-NEXT: addi a4, a0, 2047
-; RV32ZBA-NEXT: addi a4, a4, 2
-; RV32ZBA-NEXT: sltu a0, a4, a0
-; RV32ZBA-NEXT: add a5, a1, a0
-; RV32ZBA-NEXT: bgeu a4, a3, .LBB68_2
-; RV32ZBA-NEXT: # %bb.1: # %entry
-; RV32ZBA-NEXT: sltu a0, a5, a1
-; RV32ZBA-NEXT: .LBB68_2: # %entry
-; RV32ZBA-NEXT: sw a4, 0(a2)
-; RV32ZBA-NEXT: sw a5, 4(a2)
+; RV32ZBA-NEXT: addi a3, a0, 2047
+; RV32ZBA-NEXT: addi a3, a3, 2
+; RV32ZBA-NEXT: sltu a0, a3, a0
+; RV32ZBA-NEXT: add a4, a1, a0
+; RV32ZBA-NEXT: sltu a1, a4, a1
+; RV32ZBA-NEXT: and a0, a0, a1
+; RV32ZBA-NEXT: sw a3, 0(a2)
+; RV32ZBA-NEXT: sw a4, 4(a2)
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: uaddo.i64.constant_2049:
define i64 @uaddo.i64.constant_setcc_on_overflow_flag(ptr %p) {
; RV32-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
; RV32: # %bb.0: # %entry
-; RV32-NEXT: lw a4, 0(a0)
-; RV32-NEXT: lw a3, 4(a0)
-; RV32-NEXT: addi a0, a4, 2
-; RV32-NEXT: sltu a2, a0, a4
-; RV32-NEXT: add a1, a3, a2
-; RV32-NEXT: bltu a0, a4, .LBB69_3
-; RV32-NEXT: # %bb.1: # %entry
-; RV32-NEXT: beqz a2, .LBB69_4
-; RV32-NEXT: .LBB69_2: # %IfNoOverflow
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB69_3: # %entry
-; RV32-NEXT: sltu a2, a1, a3
+; RV32-NEXT: lw a1, 0(a0)
+; RV32-NEXT: lw a2, 4(a0)
+; RV32-NEXT: addi a0, a1, 2
+; RV32-NEXT: sltu a3, a0, a1
+; RV32-NEXT: add a1, a2, a3
+; RV32-NEXT: sltu a2, a1, a2
+; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: bnez a2, .LBB69_2
-; RV32-NEXT: .LBB69_4: # %IfOverflow
+; RV32-NEXT: # %bb.1: # %IfOverflow
; RV32-NEXT: li a0, 0
; RV32-NEXT: li a1, 0
+; RV32-NEXT: .LBB69_2: # %IfNoOverflow
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
;
; RV32ZBA-LABEL: uaddo.i64.constant_setcc_on_overflow_flag:
; RV32ZBA: # %bb.0: # %entry
-; RV32ZBA-NEXT: lw a4, 0(a0)
-; RV32ZBA-NEXT: lw a3, 4(a0)
-; RV32ZBA-NEXT: addi a0, a4, 2
-; RV32ZBA-NEXT: sltu a2, a0, a4
-; RV32ZBA-NEXT: add a1, a3, a2
-; RV32ZBA-NEXT: bltu a0, a4, .LBB69_3
-; RV32ZBA-NEXT: # %bb.1: # %entry
-; RV32ZBA-NEXT: beqz a2, .LBB69_4
-; RV32ZBA-NEXT: .LBB69_2: # %IfNoOverflow
-; RV32ZBA-NEXT: ret
-; RV32ZBA-NEXT: .LBB69_3: # %entry
-; RV32ZBA-NEXT: sltu a2, a1, a3
+; RV32ZBA-NEXT: lw a1, 0(a0)
+; RV32ZBA-NEXT: lw a2, 4(a0)
+; RV32ZBA-NEXT: addi a0, a1, 2
+; RV32ZBA-NEXT: sltu a3, a0, a1
+; RV32ZBA-NEXT: add a1, a2, a3
+; RV32ZBA-NEXT: sltu a2, a1, a2
+; RV32ZBA-NEXT: and a2, a3, a2
; RV32ZBA-NEXT: bnez a2, .LBB69_2
-; RV32ZBA-NEXT: .LBB69_4: # %IfOverflow
+; RV32ZBA-NEXT: # %bb.1: # %IfOverflow
; RV32ZBA-NEXT: li a0, 0
; RV32ZBA-NEXT: li a1, 0
+; RV32ZBA-NEXT: .LBB69_2: # %IfNoOverflow
; RV32ZBA-NEXT: ret
;
; RV64ZBA-LABEL: uaddo.i64.constant_setcc_on_overflow_flag: